diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..854773350cc5a92bf40312f8dce0f452166c19d6 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-only + +root = true + +[{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = tab +indent_size = 8 + +[*.{json,py,rs}] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = space +indent_size = 4 + +# this must be below the general *.py to overwrite it +[tools/{perf,power,rcu,testing/kunit}/**.py,] +indent_style = tab +indent_size = 8 + +[*.yaml] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = unset +insert_final_newline = true +indent_style = space +indent_size = 2 diff --git a/.gitignore b/.gitignore index 98274e1160d7b11729f307df26f3e93427705f8d..689a4fa3f5477aa0fd46997eca5ee7a29cd78f8a 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,7 @@ modules.order # !.clang-format !.cocciconfig +!.editorconfig !.get_maintainer.ignore !.gitattributes !.gitignore diff --git a/CREDITS b/CREDITS index d36c1949fa2c253175d854cebcd47c0d81a48f0d..5797e8f7e92b06f8736c01c6c191815c4802b6fd 100644 --- a/CREDITS +++ b/CREDITS @@ -179,6 +179,7 @@ E: ralf@gnu.org P: 1024/AF7B30C1 CF 97 C2 CC 6D AE A7 FE C8 BA 9C FC 88 DE 32 C3 D: Linux/MIPS port D: Linux/68k hacker +D: AX25 maintainer S: Hauptstrasse 19 S: 79837 St. Blasien S: Germany @@ -677,6 +678,10 @@ D: Media subsystem (V4L/DVB) drivers and core D: EDAC drivers and EDAC 3.0 core rework S: Brazil +N: Landen Chao +E: Landen.Chao@mediatek.com +D: MT7531 Ethernet switch support + N: Raymond Chen E: raymondc@microsoft.com D: Author of Configure script @@ -814,6 +819,10 @@ D: Support for Xircom PGSDB9 (firmware and host driver) S: Bucharest S: Romania +N: John Crispin +E: john@phrozen.org +D: MediaTek MT7623 Gigabit ethernet support + N: Laurence Culhane E: loz@holmes.demon.co.uk D: Wrote the initial alpha SLIP code @@ -1538,6 +1547,10 @@ N: Andrew Haylett E: ajh@primag.co.uk D: Selection mechanism +N: Johan Hedberg +E: johan.hedberg@gmail.com +D: Bluetooth subsystem maintainer + N: Andre Hedrick E: andre@linux-ide.org E: andre@linuxdiskcert.org @@ -3052,6 +3065,10 @@ S: Demonstratsii 8-382 S: Tula 300000 S: Russia +N: Thomas Petazzoni +E: thomas.petazzoni@bootlin.com +D: Driver for the Marvell Armada 370/XP network unit. + N: Gordon Peters E: GordPeters@smarttech.com D: Isochronous receive for IEEE 1394 driver (OHCI module). diff --git a/Documentation/ABI/testing/debugfs-vfio b/Documentation/ABI/testing/debugfs-vfio new file mode 100644 index 0000000000000000000000000000000000000000..90f7c262f591306bdb99295ab4e857ca0e0b537a --- /dev/null +++ b/Documentation/ABI/testing/debugfs-vfio @@ -0,0 +1,25 @@ +What: /sys/kernel/debug/vfio +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: This debugfs file directory is used for debugging + of vfio devices, it's a common directory for all vfio devices. + Vfio core will create a device subdirectory under this + directory. + +What: /sys/kernel/debug/vfio//migration +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: This debugfs file directory is used for debugging + of vfio devices that support live migration. + The debugfs of each vfio device that supports live migration + could be created under this directory. + +What: /sys/kernel/debug/vfio//migration/state +Date: December 2023 +KernelVersion: 6.8 +Contact: Longfang Liu +Description: Read the live migration status of the vfio device. + The contents of the state file reflects the migration state + relative to those defined in the vfio_device_mig_state enum diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index e76c3600607f8cc697e0ec2eb6753fffa6eb1647..fff2581b80335891247c5fe1a86cb410ea8a559b 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -28,6 +28,23 @@ Description: Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/ram/qos_class +Date: May, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" + this attribute conveys a comma delimited list of platform + specific cookies that identifies a QoS performance class + for the volatile partition of the CXL mem device. These + class-ids can be compared against a similar "qos_class" + published for a root decoder. While it is not required + that the endpoints map their local memory-class to a + matching platform class, mismatches are not recommended + and there are platform specific performance related + side-effects that may result. First class-id is displayed. + + What: /sys/bus/cxl/devices/memX/pmem/size Date: December, 2020 KernelVersion: v5.12 @@ -38,6 +55,23 @@ Description: Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/pmem/qos_class +Date: May, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" + this attribute conveys a comma delimited list of platform + specific cookies that identifies a QoS performance class + for the persistent partition of the CXL mem device. These + class-ids can be compared against a similar "qos_class" + published for a root decoder. While it is not required + that the endpoints map their local memory-class to a + matching platform class, mismatches are not recommended + and there are platform specific performance related + side-effects that may result. First class-id is displayed. + + What: /sys/bus/cxl/devices/memX/serial Date: January, 2022 KernelVersion: v5.18 diff --git a/Documentation/admin-guide/cifs/todo.rst b/Documentation/admin-guide/cifs/todo.rst index 2646ed2e2d3e32751d3eacfc89a87b73bd2d79e4..9a65c670774ee822135c6206edd7a7c1f59c723d 100644 --- a/Documentation/admin-guide/cifs/todo.rst +++ b/Documentation/admin-guide/cifs/todo.rst @@ -2,7 +2,8 @@ TODO ==== -Version 2.14 December 21, 2018 +As of 6.7 kernel. See https://wiki.samba.org/index.php/LinuxCIFSKernel +for list of features added by release A Partial List of Missing Features ================================== @@ -12,22 +13,22 @@ for visible, important contributions to this module. Here is a partial list of the known problems and missing features: a) SMB3 (and SMB3.1.1) missing optional features: + multichannel performance optimizations, algorithmic channel selection, + directory leases optimizations, + support for faster packet signing (GMAC), + support for compression over the network, + T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl + are currently the only two server side copy mechanisms supported) - - multichannel (partially integrated), integration of multichannel with RDMA - - directory leases (improved metadata caching). Currently only implemented for root dir - - T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl - currently the only two server side copy mechanisms supported) +b) Better optimized compounding and error handling for sparse file support, + perhaps addition of new optional SMB3.1.1 fsctls to make collapse range + and insert range more atomic -b) improved sparse file support (fiemap and SEEK_HOLE are implemented - but additional features would be supportable by the protocol such - as FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE) - -c) Directory entry caching relies on a 1 second timer, rather than - using Directory Leases, currently only the root file handle is cached longer - by leveraging Directory Leases +c) Support for SMB3.1.1 over QUIC (and perhaps other socket based protocols + like SCTP) d) quota support (needs minor kernel change since quota calls otherwise - won't make it to network filesystems or deviceless filesystems). + won't make it to network filesystems or deviceless filesystems). e) Additional use cases can be optimized to use "compounding" (e.g. open/query/close and open/setinfo/close) to reduce the number of @@ -92,23 +93,20 @@ t) split cifs and smb3 support into separate modules so legacy (and less v) Additional testing of POSIX Extensions for SMB3.1.1 -w) Add support for additional strong encryption types, and additional spnego - authentication mechanisms (see MS-SMB2). GCM-256 is now partially implemented. +w) Support for the Mac SMB3.1.1 extensions to improve interop with Apple servers + +x) Support for additional authentication options (e.g. IAKERB, peer-to-peer + Kerberos, SCRAM and others supported by existing servers) -x) Finish support for SMB3.1.1 compression +y) Improved tracing, more eBPF trace points, better scripts for performance + analysis Known Bugs ========== See https://bugzilla.samba.org - search on product "CifsVFS" for current bug list. Also check http://bugzilla.kernel.org (Product = File System, Component = CIFS) - -1) existing symbolic links (Windows reparse points) are recognized but - can not be created remotely. They are implemented for Samba and those that - support the CIFS Unix extensions, although earlier versions of Samba - overly restrict the pathnames. -2) follow_link and readdir code does not follow dfs junctions - but recognizes them +and xfstest results e.g. https://wiki.samba.org/index.php/Xfstest-results-smb3 Misc testing to do ================== diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index 5f936b4b601881d3e031b71a1cc258c5ddb9585e..aa8290a29dc88b0efdf56500ea40b1a97ddccc9c 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -81,7 +81,7 @@ much older and less secure than the default dialect SMB3 which includes many advanced security features such as downgrade attack detection and encrypted shares and stronger signing and authentication algorithms. There are additional mount options that may be helpful for SMB3 to get -improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1): +improved POSIX behavior (NB: can use vers=3 to force SMB3 or later, never 2.1): ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``) @@ -715,6 +715,7 @@ DebugData Displays information about active CIFS sessions and Stats Lists summary resource usage information as well as per share statistics. open_files List all the open file handles on all active SMB sessions. +mount_params List of all mount parameters available for the module ======================= ======================================================= Configuration pseudo-files: @@ -864,6 +865,11 @@ i.e.:: echo "value" > /sys/module/cifs/parameters/ +More detailed descriptions of the available module parameters and their values +can be seen by doing: + + modinfo cifs (or modinfo smb3) + ================= ========================================================== 1. enable_oplocks Enable or disable oplocks. Oplocks are enabled by default. [Y/y/1]. To disable use any of [N/n/0]. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 33ecb6b76ea5977539c1fa5468673045f6489bad..da1dc271d5cca4b56d4c06924c67c0d33dfb5bbf 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -888,9 +888,9 @@ memory region [offset, offset + size] for that kernel image. If '@offset' is omitted, then a suitable offset is selected automatically. - [KNL, X86-64, ARM64, RISCV] Select a region under 4G first, and - fall back to reserve region above 4G when '@offset' - hasn't been specified. + [KNL, X86-64, ARM64, RISCV, LoongArch] Select a region + under 4G first, and fall back to reserve region above + 4G when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. crashkernel=range1:size1[,range2:size2,...][@offset] @@ -901,25 +901,27 @@ Documentation/admin-guide/kdump/kdump.rst for an example. crashkernel=size[KMG],high - [KNL, X86-64, ARM64, RISCV] range could be above 4G. + [KNL, X86-64, ARM64, RISCV, LoongArch] range could be + above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. crashkernel=size[KMG],low - [KNL, X86-64, ARM64, RISCV] range under 4G. When crashkernel=X,high - is passed, kernel could allocate physical memory region - above 4G, that cause second kernel crash on system - that require some amount of low memory, e.g. swiotlb - requires at least 64M+32K low memory, also enough extra - low memory is needed to make sure DMA buffers for 32-bit - devices won't run out. Kernel would try to allocate + [KNL, X86-64, ARM64, RISCV, LoongArch] range under 4G. + When crashkernel=X,high is passed, kernel could allocate + physical memory region above 4G, that cause second kernel + crash on system that require some amount of low memory, + e.g. swiotlb requires at least 64M+32K low memory, also + enough extra low memory is needed to make sure DMA buffers + for 32-bit devices won't run out. Kernel would try to allocate default size of memory below 4G automatically. The default size is platform dependent. --> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB) --> arm64: 128MiB --> riscv: 128MiB + --> loongarch: 128MiB This one lets the user specify own low range under 4G for second kernel instead. 0: to disable low allocation. diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bfdf236e2af3d4ca5dc3e6330ea6737704d43329..e8c2ce1f9df68df5976b7cc536d3f48c0501ba4b 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -71,6 +71,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | @@ -235,11 +237,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 | +----------------+-----------------+-----------------+-----------------------------+ - +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ - +----------------+-----------------+-----------------+-----------------------------+ | ASR | ASR8601 | #8601001 | N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst index dc8d9fd2c3f76cbb12b85229cba2101671d56cf4..719043cd8b46999301d32e77060b8340923a476d 100644 --- a/Documentation/arch/x86/tdx.rst +++ b/Documentation/arch/x86/tdx.rst @@ -10,6 +10,191 @@ encrypting the guest memory. In TDX, a special module running in a special mode sits between the host and the guest and manages the guest/host separation. +TDX Host Kernel Support +======================= + +TDX introduces a new CPU mode called Secure Arbitration Mode (SEAM) and +a new isolated range pointed by the SEAM Ranger Register (SEAMRR). A +CPU-attested software module called 'the TDX module' runs inside the new +isolated range to provide the functionalities to manage and run protected +VMs. + +TDX also leverages Intel Multi-Key Total Memory Encryption (MKTME) to +provide crypto-protection to the VMs. TDX reserves part of MKTME KeyIDs +as TDX private KeyIDs, which are only accessible within the SEAM mode. +BIOS is responsible for partitioning legacy MKTME KeyIDs and TDX KeyIDs. + +Before the TDX module can be used to create and run protected VMs, it +must be loaded into the isolated range and properly initialized. The TDX +architecture doesn't require the BIOS to load the TDX module, but the +kernel assumes it is loaded by the BIOS. + +TDX boot-time detection +----------------------- + +The kernel detects TDX by detecting TDX private KeyIDs during kernel +boot. Below dmesg shows when TDX is enabled by BIOS:: + + [..] virt/tdx: BIOS enabled: private KeyID range: [16, 64) + +TDX module initialization +--------------------------------------- + +The kernel talks to the TDX module via the new SEAMCALL instruction. The +TDX module implements SEAMCALL leaf functions to allow the kernel to +initialize it. + +If the TDX module isn't loaded, the SEAMCALL instruction fails with a +special error. In this case the kernel fails the module initialization +and reports the module isn't loaded:: + + [..] virt/tdx: module not loaded + +Initializing the TDX module consumes roughly ~1/256th system RAM size to +use it as 'metadata' for the TDX memory. It also takes additional CPU +time to initialize those metadata along with the TDX module itself. Both +are not trivial. The kernel initializes the TDX module at runtime on +demand. + +Besides initializing the TDX module, a per-cpu initialization SEAMCALL +must be done on one cpu before any other SEAMCALLs can be made on that +cpu. + +The kernel provides two functions, tdx_enable() and tdx_cpu_enable() to +allow the user of TDX to enable the TDX module and enable TDX on local +cpu respectively. + +Making SEAMCALL requires VMXON has been done on that CPU. Currently only +KVM implements VMXON. For now both tdx_enable() and tdx_cpu_enable() +don't do VMXON internally (not trivial), but depends on the caller to +guarantee that. + +To enable TDX, the caller of TDX should: 1) temporarily disable CPU +hotplug; 2) do VMXON and tdx_enable_cpu() on all online cpus; 3) call +tdx_enable(). For example:: + + cpus_read_lock(); + on_each_cpu(vmxon_and_tdx_cpu_enable()); + ret = tdx_enable(); + cpus_read_unlock(); + if (ret) + goto no_tdx; + // TDX is ready to use + +And the caller of TDX must guarantee the tdx_cpu_enable() has been +successfully done on any cpu before it wants to run any other SEAMCALL. +A typical usage is do both VMXON and tdx_cpu_enable() in CPU hotplug +online callback, and refuse to online if tdx_cpu_enable() fails. + +User can consult dmesg to see whether the TDX module has been initialized. + +If the TDX module is initialized successfully, dmesg shows something +like below:: + + [..] virt/tdx: 262668 KBs allocated for PAMT + [..] virt/tdx: module initialized + +If the TDX module failed to initialize, dmesg also shows it failed to +initialize:: + + [..] virt/tdx: module initialization failed ... + +TDX Interaction to Other Kernel Components +------------------------------------------ + +TDX Memory Policy +~~~~~~~~~~~~~~~~~ + +TDX reports a list of "Convertible Memory Region" (CMR) to tell the +kernel which memory is TDX compatible. The kernel needs to build a list +of memory regions (out of CMRs) as "TDX-usable" memory and pass those +regions to the TDX module. Once this is done, those "TDX-usable" memory +regions are fixed during module's lifetime. + +To keep things simple, currently the kernel simply guarantees all pages +in the page allocator are TDX memory. Specifically, the kernel uses all +system memory in the core-mm "at the time of TDX module initialization" +as TDX memory, and in the meantime, refuses to online any non-TDX-memory +in the memory hotplug. + +Physical Memory Hotplug +~~~~~~~~~~~~~~~~~~~~~~~ + +Note TDX assumes convertible memory is always physically present during +machine's runtime. A non-buggy BIOS should never support hot-removal of +any convertible memory. This implementation doesn't handle ACPI memory +removal but depends on the BIOS to behave correctly. + +CPU Hotplug +~~~~~~~~~~~ + +TDX module requires the per-cpu initialization SEAMCALL must be done on +one cpu before any other SEAMCALLs can be made on that cpu. The kernel +provides tdx_cpu_enable() to let the user of TDX to do it when the user +wants to use a new cpu for TDX task. + +TDX doesn't support physical (ACPI) CPU hotplug. During machine boot, +TDX verifies all boot-time present logical CPUs are TDX compatible before +enabling TDX. A non-buggy BIOS should never support hot-add/removal of +physical CPU. Currently the kernel doesn't handle physical CPU hotplug, +but depends on the BIOS to behave correctly. + +Note TDX works with CPU logical online/offline, thus the kernel still +allows to offline logical CPU and online it again. + +Kexec() +~~~~~~~ + +TDX host support currently lacks the ability to handle kexec. For +simplicity only one of them can be enabled in the Kconfig. This will be +fixed in the future. + +Erratum +~~~~~~~ + +The first few generations of TDX hardware have an erratum. A partial +write to a TDX private memory cacheline will silently "poison" the +line. Subsequent reads will consume the poison and generate a machine +check. + +A partial write is a memory write where a write transaction of less than +cacheline lands at the memory controller. The CPU does these via +non-temporal write instructions (like MOVNTI), or through UC/WC memory +mappings. Devices can also do partial writes via DMA. + +Theoretically, a kernel bug could do partial write to TDX private memory +and trigger unexpected machine check. What's more, the machine check +code will present these as "Hardware error" when they were, in fact, a +software-triggered issue. But in the end, this issue is hard to trigger. + +If the platform has such erratum, the kernel prints additional message in +machine check handler to tell user the machine check may be caused by +kernel bug on TDX private memory. + +Interaction vs S3 and deeper states +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TDX cannot survive from S3 and deeper states. The hardware resets and +disables TDX completely when platform goes to S3 and deeper. Both TDX +guests and the TDX module get destroyed permanently. + +The kernel uses S3 for suspend-to-ram, and use S4 and deeper states for +hibernation. Currently, for simplicity, the kernel chooses to make TDX +mutually exclusive with S3 and hibernation. + +The kernel disables TDX during early boot when hibernation support is +available:: + + [..] virt/tdx: initialization failed: Hibernation support is enabled + +Add 'nohibernate' kernel command line to disable hibernation in order to +use TDX. + +ACPI S3 is disabled during kernel early boot if TDX is enabled. The user +needs to turn off TDX in the BIOS in order to use S3. + +TDX Guest Support +================= Since the host cannot directly access guest registers or memory, much normal functionality of a hypervisor must be moved into the guest. This is implemented using a Virtualization Exception (#VE) that is handled by the @@ -20,7 +205,7 @@ TDX includes new hypercall-like mechanisms for communicating from the guest to the hypervisor or the TDX module. New TDX Exceptions -================== +------------------ TDX guests behave differently from bare-metal and traditional VMX guests. In TDX guests, otherwise normal instructions or memory accesses can cause @@ -30,7 +215,7 @@ Instructions marked with an '*' conditionally cause exceptions. The details for these instructions are discussed below. Instruction-based #VE ---------------------- +~~~~~~~~~~~~~~~~~~~~~ - Port I/O (INS, OUTS, IN, OUT) - HLT @@ -41,7 +226,7 @@ Instruction-based #VE - CPUID* Instruction-based #GP ---------------------- +~~~~~~~~~~~~~~~~~~~~~ - All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH, VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON @@ -52,7 +237,7 @@ Instruction-based #GP - RDMSR*,WRMSR* RDMSR/WRMSR Behavior --------------------- +~~~~~~~~~~~~~~~~~~~~ MSR access behavior falls into three categories: @@ -73,7 +258,7 @@ trapping and handling in the TDX module. Other than possibly being slow, these MSRs appear to function just as they would on bare metal. CPUID Behavior --------------- +~~~~~~~~~~~~~~ For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID return values (in guest EAX/EBX/ECX/EDX) are configurable by the @@ -93,7 +278,7 @@ not know how to handle. The guest kernel may ask the hypervisor for the value with a hypercall. #VE on Memory Accesses -====================== +---------------------- There are essentially two classes of TDX memory: private and shared. Private memory receives full TDX protections. Its content is protected @@ -107,7 +292,7 @@ entries. This helps ensure that a guest does not place sensitive information in shared memory, exposing it to the untrusted hypervisor. #VE on Shared Memory --------------------- +~~~~~~~~~~~~~~~~~~~~ Access to shared mappings can cause a #VE. The hypervisor ultimately controls whether a shared memory access causes a #VE, so the guest must be @@ -127,7 +312,7 @@ be careful not to access device MMIO regions unless it is also prepared to handle a #VE. #VE on Private Pages --------------------- +~~~~~~~~~~~~~~~~~~~~ An access to private mappings can also cause a #VE. Since all kernel memory is also private memory, the kernel might theoretically need to @@ -145,7 +330,7 @@ The hypervisor is permitted to unilaterally move accepted pages to a to handle the exception. Linux #VE handler -================= +----------------- Just like page faults or #GP's, #VE exceptions can be either handled or be fatal. Typically, an unhandled userspace #VE results in a SIGSEGV. @@ -167,7 +352,7 @@ While the block is in place, any #VE is elevated to a double fault (#DF) which is not recoverable. MMIO handling -============= +------------- In non-TDX VMs, MMIO is usually implemented by giving a guest access to a mapping which will cause a VMEXIT on access, and then the hypervisor @@ -189,7 +374,7 @@ MMIO access via other means (like structure overlays) may result in an oops. Shared Memory Conversions -========================= +------------------------- All TDX guest memory starts out as private at boot. This memory can not be accessed by the hypervisor. However, some kernel users like device diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst index a25c6d5df87b20ff2149353adcf854b1b7965f80..4662e1ff3d81f28f57417ff70ac81f84220df88f 100644 --- a/Documentation/block/ioprio.rst +++ b/Documentation/block/ioprio.rst @@ -6,17 +6,16 @@ Block io priorities Intro ----- -With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io -priorities are supported for reads on files. This enables users to io nice -processes or process groups, similar to what has been possible with cpu -scheduling for ages. This document mainly details the current possibilities -with cfq; other io schedulers do not support io priorities thus far. +The io priority feature enables users to io nice processes or process groups, +similar to what has been possible with cpu scheduling for ages. Support for io +priorities is io scheduler dependent and currently supported by bfq and +mq-deadline. Scheduling classes ------------------ -CFQ implements three generic scheduling classes that determine how io is -served for a process. +Three generic scheduling classes are implemented for io priorities that +determine how io is served for a process. IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given higher priority than any other in the system, processes from this class are diff --git a/Documentation/dev-tools/checkuapi.rst b/Documentation/dev-tools/checkuapi.rst new file mode 100644 index 0000000000000000000000000000000000000000..9072f21b50b0c0a980a948155afae59b95a0c2d3 --- /dev/null +++ b/Documentation/dev-tools/checkuapi.rst @@ -0,0 +1,477 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +============ +UAPI Checker +============ + +The UAPI checker (``scripts/check-uapi.sh``) is a shell script which +checks UAPI header files for userspace backwards-compatibility across +the git tree. + +Options +======= + +This section will describe the options with which ``check-uapi.sh`` +can be run. + +Usage:: + + check-uapi.sh [-b BASE_REF] [-p PAST_REF] [-j N] [-l ERROR_LOG] [-i] [-q] [-v] + +Available options:: + + -b BASE_REF Base git reference to use for comparison. If unspecified or empty, + will use any dirty changes in tree to UAPI files. If there are no + dirty changes, HEAD will be used. + -p PAST_REF Compare BASE_REF to PAST_REF (e.g. -p v6.1). If unspecified or empty, + will use BASE_REF^1. Must be an ancestor of BASE_REF. Only headers + that exist on PAST_REF will be checked for compatibility. + -j JOBS Number of checks to run in parallel (default: number of CPU cores). + -l ERROR_LOG Write error log to file (default: no error log is generated). + -i Ignore ambiguous changes that may or may not break UAPI compatibility. + -q Quiet operation. + -v Verbose operation (print more information about each header being checked). + +Environmental args:: + + ABIDIFF Custom path to abidiff binary + CC C compiler (default is "gcc") + ARCH Target architecture of C compiler (default is host arch) + +Exit codes:: + + 0) Success + 1) ABI difference detected + 2) Prerequisite not met + +Examples +======== + +Basic Usage +----------- + +First, let's try making a change to a UAPI header file that obviously +won't break userspace:: + + cat << 'EOF' | patch -l -p1 + --- a/include/uapi/linux/acct.h + +++ b/include/uapi/linux/acct.h + @@ -21,7 +21,9 @@ + #include + #include + + -/* + +#define FOO + + + +/* + * comp_t is a 16-bit "floating" point number with a 3-bit base 8 + * exponent and a 13-bit fraction. + * comp2_t is 24-bit with 5-bit base 2 exponent and 20 bit fraction + diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h + EOF + +Now, let's use the script to validate:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + All 912 UAPI headers compatible with x86 appear to be backwards compatible + +Let's add another change that *might* break userspace:: + + cat << 'EOF' | patch -l -p1 + --- a/include/uapi/linux/bpf.h + +++ b/include/uapi/linux/bpf.h + @@ -74,7 +74,7 @@ struct bpf_insn { + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + - __s32 imm; /* signed immediate constant */ + + __u32 imm; /* unsigned immediate constant */ + }; + + /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ + EOF + +The script will catch this:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ==== + [C] 'struct bpf_insn' changed: + type size hasn't changed + 1 data member change: + type of '__s32 imm' changed: + typedef name changed from __s32 to __u32 at int-ll64.h:27:1 + underlying type 'int' changed: + type name changed from 'int' to 'unsigned int' + type size hasn't changed + ================================================================================== + + error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +In this case, the script is reporting the type change because it could +break a userspace program that passes in a negative number. Now, let's +say you know that no userspace program could possibly be using a negative +value in ``imm``, so changing to an unsigned type there shouldn't hurt +anything. You can pass the ``-i`` flag to the script to ignore changes +in which the userspace backwards compatibility is ambiguous:: + + % ./scripts/check-uapi.sh -i + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + All 912 UAPI headers compatible with x86 appear to be backwards compatible + +Now, let's make a similar change that *will* break userspace:: + + cat << 'EOF' | patch -l -p1 + --- a/include/uapi/linux/bpf.h + +++ b/include/uapi/linux/bpf.h + @@ -71,8 +71,8 @@ enum { + + struct bpf_insn { + __u8 code; /* opcode */ + - __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + + __u8 dst_reg:4; /* dest register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ + }; + EOF + +Since we're re-ordering an existing struct member, there's no ambiguity, +and the script will report the breakage even if you pass ``-i``:: + + % ./scripts/check-uapi.sh -i + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ==== + [C] 'struct bpf_insn' changed: + type size hasn't changed + 2 data member changes: + '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits) + '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits) + ================================================================================== + + error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +Let's commit the breaking change, then commit the innocuous change:: + + % git commit -m 'Breaking UAPI change' include/uapi/linux/bpf.h + [detached HEAD f758e574663a] Breaking UAPI change + 1 file changed, 1 insertion(+), 1 deletion(-) + % git commit -m 'Innocuous UAPI change' include/uapi/linux/acct.h + [detached HEAD 2e87df769081] Innocuous UAPI change + 1 file changed, 3 insertions(+), 1 deletion(-) + +Now, let's run the script again with no arguments:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from HEAD... OK + Installing user-facing UAPI headers from HEAD^1... OK + Checking changes to UAPI headers between HEAD^1 and HEAD... + All 912 UAPI headers compatible with x86 appear to be backwards compatible + +It doesn't catch any breaking change because, by default, it only +compares ``HEAD`` to ``HEAD^1``. The breaking change was committed on +``HEAD~2``. If we wanted the search scope to go back further, we'd have to +use the ``-p`` option to pass a different past reference. In this case, +let's pass ``-p HEAD~2`` to the script so it checks UAPI changes between +``HEAD~2`` and ``HEAD``:: + + % ./scripts/check-uapi.sh -p HEAD~2 + Installing user-facing UAPI headers from HEAD... OK + Installing user-facing UAPI headers from HEAD~2... OK + Checking changes to UAPI headers between HEAD~2 and HEAD... + ==== ABI differences detected in include/linux/bpf.h from HEAD~2 -> HEAD ==== + [C] 'struct bpf_insn' changed: + type size hasn't changed + 2 data member changes: + '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits) + '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits) + ============================================================================== + + error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +Alternatively, we could have also run with ``-b HEAD~``. This would set the +base reference to ``HEAD~`` so then the script would compare it to ``HEAD~^1``. + +Architecture-specific Headers +----------------------------- + +Consider this change:: + + cat << 'EOF' | patch -l -p1 + --- a/arch/arm64/include/uapi/asm/sigcontext.h + +++ b/arch/arm64/include/uapi/asm/sigcontext.h + @@ -70,6 +70,7 @@ struct sigcontext { + struct _aarch64_ctx { + __u32 magic; + __u32 size; + + __u32 new_var; + }; + + #define FPSIMD_MAGIC 0x46508001 + EOF + +This is a change to an arm64-specific UAPI header file. In this example, I'm +running the script from an x86 machine with an x86 compiler, so, by default, +the script only checks x86-compatible UAPI header files:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + No changes to UAPI headers were applied between HEAD and dirty tree + +With an x86 compiler, we can't check header files in ``arch/arm64``, so the +script doesn't even try. + +If we want to check the header file, we'll have to use an arm64 compiler and +set ``ARCH`` accordingly:: + + % CC=aarch64-linux-gnu-gcc ARCH=arm64 ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + ==== ABI differences detected in include/asm/sigcontext.h from HEAD -> dirty tree ==== + [C] 'struct _aarch64_ctx' changed: + type size changed from 64 to 96 (in bits) + 1 data member insertion: + '__u32 new_var', at offset 64 (in bits) at sigcontext.h:73:1 + -- snip -- + [C] 'struct zt_context' changed: + type size changed from 128 to 160 (in bits) + 2 data member changes (1 filtered): + '__u16 nregs' offset changed from 64 to 96 (in bits) (by +32 bits) + '__u16 __reserved[3]' offset changed from 80 to 112 (in bits) (by +32 bits) + ======================================================================================= + + error - 1/884 UAPI headers compatible with arm64 appear _not_ to be backwards compatible + +We can see with ``ARCH`` and ``CC`` set properly for the file, the ABI +change is reported properly. Also notice that the total number of UAPI +header files checked by the script changes. This is because the number +of headers installed for arm64 platforms is different than x86. + +Cross-Dependency Breakages +-------------------------- + +Consider this change:: + + cat << 'EOF' | patch -l -p1 + --- a/include/uapi/linux/types.h + +++ b/include/uapi/linux/types.h + @@ -52,7 +52,7 @@ typedef __u32 __bitwise __wsum; + #define __aligned_be64 __be64 __attribute__((aligned(8))) + #define __aligned_le64 __le64 __attribute__((aligned(8))) + + -typedef unsigned __bitwise __poll_t; + +typedef unsigned short __bitwise __poll_t; + + #endif /* __ASSEMBLY__ */ + #endif /* _UAPI_LINUX_TYPES_H */ + EOF + +Here, we're changing a ``typedef`` in ``types.h``. This doesn't break +a UAPI in ``types.h``, but other UAPIs in the tree may break due to +this change:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + ==== ABI differences detected in include/linux/eventpoll.h from HEAD -> dirty tree ==== + [C] 'struct epoll_event' changed: + type size changed from 96 to 80 (in bits) + 2 data member changes: + type of '__poll_t events' changed: + underlying type 'unsigned int' changed: + type name changed from 'unsigned int' to 'unsigned short int' + type size changed from 32 to 16 (in bits) + '__u64 data' offset changed from 32 to 16 (in bits) (by -16 bits) + ======================================================================================== + include/linux/eventpoll.h did not change between HEAD and dirty tree... + It's possible a change to one of the headers it includes caused this error: + #include + #include + +Note that the script noticed the failing header file did not change, +so it assumes one of its includes must have caused the breakage. Indeed, +we can see ``linux/types.h`` is used from ``eventpoll.h``. + +UAPI Header Removals +-------------------- + +Consider this change:: + + cat << 'EOF' | patch -l -p1 + diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild + index ebb180aac74e..a9c88b0a8b3b 100644 + --- a/include/uapi/asm-generic/Kbuild + +++ b/include/uapi/asm-generic/Kbuild + @@ -31,6 +31,6 @@ mandatory-y += stat.h + mandatory-y += statfs.h + mandatory-y += swab.h + mandatory-y += termbits.h + -mandatory-y += termios.h + +#mandatory-y += termios.h + mandatory-y += types.h + mandatory-y += unistd.h + EOF + +This script removes a UAPI header file from the install list. Let's run +the script:: + + % ./scripts/check-uapi.sh + Installing user-facing UAPI headers from dirty tree... OK + Installing user-facing UAPI headers from HEAD... OK + Checking changes to UAPI headers between HEAD and dirty tree... + ==== UAPI header include/asm/termios.h was removed between HEAD and dirty tree ==== + + error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +Removing a UAPI header is considered a breaking change, and the script +will flag it as such. + +Checking Historic UAPI Compatibility +------------------------------------ + +You can use the ``-b`` and ``-p`` options to examine different chunks of your +git tree. For example, to check all changed UAPI header files between tags +v6.0 and v6.1, you'd run:: + + % ./scripts/check-uapi.sh -b v6.1 -p v6.0 + Installing user-facing UAPI headers from v6.1... OK + Installing user-facing UAPI headers from v6.0... OK + Checking changes to UAPI headers between v6.0 and v6.1... + + --- snip --- + error - 37/907 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +Note: Before v5.3, a header file needed by the script is not present, +so the script is unable to check changes before then. + +You'll notice that the script detected many UAPI changes that are not +backwards compatible. Knowing that kernel UAPIs are supposed to be stable +forever, this is an alarming result. This brings us to the next section: +caveats. + +Caveats +======= + +The UAPI checker makes no assumptions about the author's intention, so some +types of changes may be flagged even though they intentionally break UAPI. + +Removals For Refactoring or Deprecation +--------------------------------------- + +Sometimes drivers for very old hardware are removed, such as in this example:: + + % ./scripts/check-uapi.sh -b ba47652ba655 + Installing user-facing UAPI headers from ba47652ba655... OK + Installing user-facing UAPI headers from ba47652ba655^1... OK + Checking changes to UAPI headers between ba47652ba655^1 and ba47652ba655... + ==== UAPI header include/linux/meye.h was removed between ba47652ba655^1 and ba47652ba655 ==== + + error - 1/910 UAPI headers compatible with x86 appear _not_ to be backwards compatible + +The script will always flag removals (even if they're intentional). + +Struct Expansions +----------------- + +Depending on how a structure is handled in kernelspace, a change which +expands a struct could be non-breaking. + +If a struct is used as the argument to an ioctl, then the kernel driver +must be able to handle ioctl commands of any size. Beyond that, you need +to be careful when copying data from the user. Say, for example, that +``struct foo`` is changed like this:: + + struct foo { + __u64 a; /* added in version 1 */ + + __u32 b; /* added in version 2 */ + + __u32 c; /* added in version 2 */ + } + +By default, the script will flag this kind of change for further review:: + + [C] 'struct foo' changed: + type size changed from 64 to 128 (in bits) + 2 data member insertions: + '__u32 b', at offset 64 (in bits) + '__u32 c', at offset 96 (in bits) + +However, it is possible that this change was made safely. + +If a userspace program was built with version 1, it will think +``sizeof(struct foo)`` is 8. That size will be encoded in the +ioctl value that gets sent to the kernel. If the kernel is built +with version 2, it will think the ``sizeof(struct foo)`` is 16. + +The kernel can use the ``_IOC_SIZE`` macro to get the size encoded +in the ioctl code that the user passed in and then use +``copy_struct_from_user()`` to safely copy the value:: + + int handle_ioctl(unsigned long cmd, unsigned long arg) + { + switch _IOC_NR(cmd) { + 0x01: { + struct foo my_cmd; /* size 16 in the kernel */ + + ret = copy_struct_from_user(&my_cmd, arg, sizeof(struct foo), _IOC_SIZE(cmd)); + ... + +``copy_struct_from_user`` will zero the struct in the kernel and then copy +only the bytes passed in from the user (leaving new members zeroized). +If the user passed in a larger struct, the extra members are ignored. + +If you know this situation is accounted for in the kernel code, you can +pass ``-i`` to the script, and struct expansions like this will be ignored. + +Flex Array Migration +-------------------- + +While the script handles expansion into an existing flex array, it does +still flag initial migration to flex arrays from 1-element fake flex +arrays. For example:: + + struct foo { + __u32 x; + - __u32 flex[1]; /* fake flex */ + + __u32 flex[]; /* real flex */ + }; + +This change would be flagged by the script:: + + [C] 'struct foo' changed: + type size changed from 64 to 32 (in bits) + 1 data member change: + type of '__u32 flex[1]' changed: + type name changed from '__u32[1]' to '__u32[]' + array type size changed from 32 to 'unknown' + array type subrange 1 changed length from 1 to 'unknown' + +At this time, there's no way to filter these types of changes, so be +aware of this possible false positive. + +Summary +------- + +While many types of false positives are filtered out by the script, +it's possible there are some cases where the script flags a change +which does not break UAPI. It's also possible a change which *does* +break userspace would not be flagged by this script. While the script +has been run on much of the kernel history, there could still be corner +cases that are not accounted for. + +The intention is for this script to be used as a quick check for +maintainers or automated tooling, not as the end-all authority on +patch compatibility. It's best to remember: use your best judgment +(and ideally a unit test in userspace) to make sure your UAPI changes +are backwards-compatible! diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index 3d2286c683bc99575b18746ee5529d5d64dceff5..efa49cdc8e2eb3cd7f104fc90c5c0e4ff4e9ad3b 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst @@ -31,6 +31,7 @@ Documentation/dev-tools/testing-overview.rst kselftest kunit/index ktap + checkuapi .. only:: subproject and html diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml index 04d150d4d15d3cc74958c562ceaf921dc4bb24b0..e6afca558c2dfa4d84f5e821f519c1ad9dfa7d39 100644 --- a/Documentation/devicetree/bindings/dma/dma-controller.yaml +++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml @@ -19,19 +19,4 @@ properties: additionalProperties: true -examples: - - | - dma: dma-controller@48000000 { - compatible = "ti,omap-sdma"; - reg = <0x48000000 0x1000>; - interrupts = <0 12 0x4>, - <0 13 0x4>, - <0 14 0x4>, - <0 15 0x4>; - #dma-cells = <1>; - dma-channels = <32>; - dma-requests = <127>; - dma-channel-mask = <0xfffe>; - }; - ... diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml index 346fe0fa4460e316223d80ed0ffbd890dfd65450..5ad2febc581e23a72d862b6e22c379bbe13bbaec 100644 --- a/Documentation/devicetree/bindings/dma/dma-router.yaml +++ b/Documentation/devicetree/bindings/dma/dma-router.yaml @@ -40,15 +40,4 @@ required: additionalProperties: true -examples: - - | - sdma_xbar: dma-router@4a002b78 { - compatible = "ti,dra7-dma-crossbar"; - reg = <0x4a002b78 0xfc>; - #dma-cells = <1>; - dma-requests = <205>; - ti,dma-safe-map = <0>; - dma-masters = <&sdma>; - }; - ... diff --git a/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml b/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a1b49a49a646b9964e071981f9bb776dcb664d3 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/loongson,ls2x-apbdma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson LS2X APB DMA controller + +description: + The Loongson LS2X APB DMA controller is used for transferring data + between system memory and the peripherals on the APB bus. + +maintainers: + - Binbin Zhou + +allOf: + - $ref: dma-controller.yaml# + +properties: + compatible: + oneOf: + - const: loongson,ls2k1000-apbdma + - items: + - const: loongson,ls2k0500-apbdma + - const: loongson,ls2k1000-apbdma + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + '#dma-cells': + const: 1 + +required: + - compatible + - reg + - interrupts + - clocks + - '#dma-cells' + +additionalProperties: false + +examples: + - | + #include + #include + + dma-controller@1fe00c00 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x1fe00c00 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml index 4003dbe94940c2150fc6105f9c18d5bd914aa39b..877147e95ecc5df1a34893ac88e0d83d70418347 100644 --- a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml +++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml @@ -53,6 +53,9 @@ properties: ADMA_CHn_CTRL register. const: 1 + dma-channel-mask: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 88d0de3d1b46b8a9ff56613bd66dd3bf86034e0e..deb64cb9ca3eacf092b1f92a14407092689212d3 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -32,6 +32,8 @@ properties: - qcom,sm8350-gpi-dma - qcom,sm8450-gpi-dma - qcom,sm8550-gpi-dma + - qcom,sm8650-gpi-dma + - qcom,x1e80100-gpi-dma - const: qcom,sm6350-gpi-dma - items: - enum: diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml index c284abc6784aec5439ba43f3fb3a8eab66fcbc16..a42b6a26a6d3f25874186faad8ce91995857f1a2 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml @@ -16,7 +16,7 @@ properties: compatible: items: - enum: - - renesas,r9a07g043-dmac # RZ/G2UL + - renesas,r9a07g043-dmac # RZ/G2UL and RZ/Five - renesas,r9a07g044-dmac # RZ/G2{L,LC} - renesas,r9a07g054-dmac # RZ/V2L - const: renesas,rz-dmac diff --git a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml index a1af0b9063653741f4bd6b6501339aea34cba13f..3b22183a1a379258f3c8c826dbc6597d1dc3b3a9 100644 --- a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml +++ b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml @@ -29,6 +29,7 @@ properties: compatible: items: - enum: + - microchip,mpfs-pdma - sifive,fu540-c000-pdma - const: sifive,pdma0 description: diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index 4ca300a42a99c2f60184318d9b7e8d5906872e44..27b8e163656006b311264c242ff2aaa790c30ebb 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -37,11 +37,11 @@ properties: reg: minItems: 3 - maxItems: 5 + maxItems: 9 reg-names: minItems: 3 - maxItems: 5 + maxItems: 9 "#dma-cells": const: 3 @@ -141,7 +141,10 @@ allOf: ti,sci-rm-range-tchan: false reg: - maxItems: 3 + items: + - description: BCDMA Control /Status Registers region + - description: RX Channel Realtime Registers region + - description: Ring Realtime Registers region reg-names: items: @@ -161,14 +164,29 @@ allOf: properties: reg: minItems: 5 + items: + - description: BCDMA Control /Status Registers region + - description: Block Copy Channel Realtime Registers region + - description: RX Channel Realtime Registers region + - description: TX Channel Realtime Registers region + - description: Ring Realtime Registers region + - description: Ring Configuration Registers region + - description: TX Channel Configuration Registers region + - description: RX Channel Configuration Registers region + - description: Block Copy Channel Configuration Registers region reg-names: + minItems: 5 items: - const: gcfg - const: bchanrt - const: rchanrt - const: tchanrt - const: ringrt + - const: ring + - const: tchan + - const: rchan + - const: bchan required: - ti,sci-rm-range-bchan @@ -184,7 +202,11 @@ allOf: ti,sci-rm-range-bchan: false reg: - maxItems: 4 + items: + - description: BCDMA Control /Status Registers region + - description: RX Channel Realtime Registers region + - description: TX Channel Realtime Registers region + - description: Ring Realtime Registers region reg-names: items: @@ -220,8 +242,13 @@ examples: <0x0 0x4c000000 0x0 0x20000>, <0x0 0x4a820000 0x0 0x20000>, <0x0 0x4aa40000 0x0 0x20000>, - <0x0 0x4bc00000 0x0 0x100000>; - reg-names = "gcfg", "bchanrt", "rchanrt", "tchanrt", "ringrt"; + <0x0 0x4bc00000 0x0 0x100000>, + <0x0 0x48600000 0x0 0x8000>, + <0x0 0x484a4000 0x0 0x2000>, + <0x0 0x484c2000 0x0 0x2000>, + <0x0 0x48420000 0x0 0x2000>; + reg-names = "gcfg", "bchanrt", "rchanrt", "tchanrt", "ringrt", + "ring", "tchan", "rchan", "bchan"; msi-parent = <&inta_main_dmss>; #dma-cells = <3>; diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index a69f62f854d8c3e8d4084c4aa2c6e0f6cccd5819..11e064c029946641c8aacf5e49eb61b41477658d 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -45,14 +45,28 @@ properties: The second cell is the ASEL value for the channel reg: - maxItems: 4 + minItems: 4 + items: + - description: Packet DMA Control /Status Registers region + - description: RX Channel Realtime Registers region + - description: TX Channel Realtime Registers region + - description: Ring Realtime Registers region + - description: Ring Configuration Registers region + - description: TX Configuration Registers region + - description: RX Configuration Registers region + - description: RX Flow Configuration Registers region reg-names: + minItems: 4 items: - const: gcfg - const: rchanrt - const: tchanrt - const: ringrt + - const: ring + - const: tchan + - const: rchan + - const: rflow msi-parent: true @@ -136,8 +150,14 @@ examples: reg = <0x0 0x485c0000 0x0 0x100>, <0x0 0x4a800000 0x0 0x20000>, <0x0 0x4aa00000 0x0 0x40000>, - <0x0 0x4b800000 0x0 0x400000>; - reg-names = "gcfg", "rchanrt", "tchanrt", "ringrt"; + <0x0 0x4b800000 0x0 0x400000>, + <0x0 0x485e0000 0x0 0x20000>, + <0x0 0x484a0000 0x0 0x4000>, + <0x0 0x484c0000 0x0 0x2000>, + <0x0 0x48430000 0x0 0x4000>; + reg-names = "gcfg", "rchanrt", "tchanrt", "ringrt", + "ring", "tchan", "rchan", "rflow"; + msi-parent = <&inta_main_dmss>; #dma-cells = <2>; diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 22f6c5e2f7f4b94fe92c477e8a4336b111896e50..b18cf2bfdb5b14789b0a9ff405d57cb717802ad3 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -69,13 +69,24 @@ properties: - ti,j721e-navss-mcu-udmap reg: - maxItems: 3 + minItems: 3 + items: + - description: UDMA-P Control /Status Registers region + - description: RX Channel Realtime Registers region + - description: TX Channel Realtime Registers region + - description: TX Configuration Registers region + - description: RX Configuration Registers region + - description: RX Flow Configuration Registers region reg-names: + minItems: 3 items: - const: gcfg - const: rchanrt - const: tchanrt + - const: tchan + - const: rchan + - const: rflow msi-parent: true @@ -158,8 +169,11 @@ examples: compatible = "ti,am654-navss-main-udmap"; reg = <0x0 0x31150000 0x0 0x100>, <0x0 0x34000000 0x0 0x100000>, - <0x0 0x35000000 0x0 0x100000>; - reg-names = "gcfg", "rchanrt", "tchanrt"; + <0x0 0x35000000 0x0 0x100000>, + <0x0 0x30b00000 0x0 0x20000>, + <0x0 0x30c00000 0x0 0x8000>, + <0x0 0x30d00000 0x0 0x4000>; + reg-names = "gcfg", "rchanrt", "tchanrt", "tchan", "rchan", "rflow"; #dma-cells = <1>; ti,ringacc = <&ringacc>; diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index b6864d0ee81e4bbf89fa75d9ee25771c234ddab6..1812ef31d5f1e941d4ae0e5a53e06f278cd55aca 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -123,6 +123,7 @@ properties: - enum: - onnn,cat24c04 - onnn,cat24c05 + - rohm,br24g04 - const: atmel,24c04 - items: - const: renesas,r1ex24016 diff --git a/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml b/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml index c1060e5fcef3a95c4bf2ef55e897c6c09b790d03..d3d8a2e143ed25dee5634ae9539c413c4f51f865 100644 --- a/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml +++ b/Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml @@ -126,7 +126,7 @@ examples: - | #include - gpio@e000a000 { + gpio@a0020000 { compatible = "xlnx,xps-gpio-1.00.a"; reg = <0xa0020000 0x10000>; #gpio-cells = <2>; diff --git a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml index 94b75d9f66cdb7b1e2227ae5dde9475449fed07d..1b31b87c1800a00d8935d261432117ea5d601191 100644 --- a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml @@ -19,6 +19,7 @@ allOf: - st,stm32f7-i2c - st,stm32mp13-i2c - st,stm32mp15-i2c + - st,stm32mp25-i2c then: properties: i2c-scl-rising-time-ns: @@ -41,6 +42,30 @@ allOf: clock-frequency: enum: [100000, 400000] + - if: + properties: + compatible: + contains: + enum: + - st,stm32f4-i2c + - st,stm32f7-i2c + - st,stm32mp13-i2c + - st,stm32mp15-i2c + then: + properties: + interrupts: + minItems: 2 + + interrupt-names: + minItems: 2 + else: + properties: + interrupts: + maxItems: 1 + + interrupt-names: + maxItems: 1 + properties: compatible: enum: @@ -48,6 +73,7 @@ properties: - st,stm32f7-i2c - st,stm32mp13-i2c - st,stm32mp15-i2c + - st,stm32mp25-i2c reg: maxItems: 1 @@ -56,11 +82,13 @@ properties: items: - description: interrupt ID for I2C event - description: interrupt ID for I2C error + minItems: 1 interrupt-names: items: - const: event - const: error + minItems: 1 resets: maxItems: 1 diff --git a/Documentation/devicetree/bindings/input/adafruit,seesaw-gamepad.yaml b/Documentation/devicetree/bindings/input/adafruit,seesaw-gamepad.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e86f6de69784c410dd4ad8798e9fd8b7afe6123 --- /dev/null +++ b/Documentation/devicetree/bindings/input/adafruit,seesaw-gamepad.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/adafruit,seesaw-gamepad.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Adafruit Mini I2C Gamepad with seesaw + +maintainers: + - Anshul Dalal + +description: | + Adafruit Mini I2C Gamepad + + +-----------------------------+ + | ___ | + | / \ (X) | + | | S | __ __ (Y) (A) | + | \___/ |ST| |SE| (B) | + | | + +-----------------------------+ + + S -> 10-bit precision bidirectional analog joystick + ST -> Start + SE -> Select + X, A, B, Y -> Digital action buttons + + Datasheet: https://cdn-learn.adafruit.com/downloads/pdf/gamepad-qt.pdf + Product page: https://www.adafruit.com/product/5743 + Arduino Driver: https://github.com/adafruit/Adafruit_Seesaw + +properties: + compatible: + const: adafruit,seesaw-gamepad + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + description: + The gamepad's IRQ pin triggers a rising edge if interrupts are enabled. + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + joystick@50 { + compatible = "adafruit,seesaw-gamepad"; + interrupts = <18 IRQ_TYPE_EDGE_RISING>; + reg = <0x50>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 159cd9d9fe573c7315cc5066409dae6c8c478e94..cc78c2152921308fe0cad3e29ca78a5fad08f066 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -31,7 +31,23 @@ patternProperties: maxItems: 1 interrupts: - maxItems: 1 + oneOf: + - items: + - description: Optional key interrupt or wakeup interrupt + - items: + - description: Key interrupt + - description: Wakeup interrupt + + interrupt-names: + description: + Optional interrupt names, can be used to specify a separate dedicated + wake-up interrupt in addition to the gpio irq + oneOf: + - items: + - enum: [ irq, wakeup ] + - items: + - const: irq + - const: wakeup label: description: Descriptive name of the key. @@ -97,6 +113,20 @@ patternProperties: - required: - gpios + allOf: + - if: + properties: + interrupts: + minItems: 2 + required: + - interrupts + then: + properties: + interrupt-names: + minItems: 2 + required: + - interrupt-names + dependencies: wakeup-event-action: [ wakeup-source ] linux,input-value: [ gpios ] @@ -137,6 +167,15 @@ examples: linux,code = <108>; interrupts = <1 IRQ_TYPE_EDGE_FALLING>; }; + + key-wakeup { + label = "GPIO Key WAKEUP"; + linux,code = <143>; + interrupts-extended = <&intc 2 IRQ_TYPE_EDGE_FALLING>, + <&intc_wakeup 0 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "irq", "wakeup"; + wakeup-source; + }; }; ... diff --git a/Documentation/devicetree/bindings/input/gpio-mouse.txt b/Documentation/devicetree/bindings/input/gpio-mouse.txt deleted file mode 100644 index 519510a11af9814b2d95c008c83f42cfc1c361d6..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/gpio-mouse.txt +++ /dev/null @@ -1,32 +0,0 @@ -Device-Tree bindings for GPIO attached mice - -This simply uses standard GPIO handles to define a simple mouse connected -to 5-7 GPIO lines. - -Required properties: - - compatible: must be "gpio-mouse" - - scan-interval-ms: The scanning interval in milliseconds - - up-gpios: GPIO line phandle to the line indicating "up" - - down-gpios: GPIO line phandle to the line indicating "down" - - left-gpios: GPIO line phandle to the line indicating "left" - - right-gpios: GPIO line phandle to the line indicating "right" - -Optional properties: - - button-left-gpios: GPIO line handle to the left mouse button - - button-middle-gpios: GPIO line handle to the middle mouse button - - button-right-gpios: GPIO line handle to the right mouse button -Example: - -#include - -gpio-mouse { - compatible = "gpio-mouse"; - scan-interval-ms = <50>; - up-gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; - down-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; - left-gpios = <&gpio0 2 GPIO_ACTIVE_LOW>; - right-gpios = <&gpio0 3 GPIO_ACTIVE_LOW>; - button-left-gpios = <&gpio0 4 GPIO_ACTIVE_LOW>; - button-middle-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; - button-right-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>; -}; diff --git a/Documentation/devicetree/bindings/input/gpio-mouse.yaml b/Documentation/devicetree/bindings/input/gpio-mouse.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3928ec6aff1dcd9f990ef61e3814fa30e776d9e6 --- /dev/null +++ b/Documentation/devicetree/bindings/input/gpio-mouse.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/gpio-mouse.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPIO attached mouse + +description: | + This simply uses standard GPIO handles to define a simple mouse connected + to 5-7 GPIO lines. + +maintainers: + - Anshul Dalal + +properties: + compatible: + const: gpio-mouse + + scan-interval-ms: + maxItems: 1 + + up-gpios: + maxItems: 1 + + down-gpios: + maxItems: 1 + + left-gpios: + maxItems: 1 + + right-gpios: + maxItems: 1 + + button-left-gpios: + maxItems: 1 + + button-middle-gpios: + maxItems: 1 + + button-right-gpios: + maxItems: 1 + +required: + - compatible + - scan-interval-ms + - up-gpios + - down-gpios + - left-gpios + - right-gpios + +additionalProperties: false + +examples: + - | + #include + + gpio-mouse { + compatible = "gpio-mouse"; + scan-interval-ms = <50>; + up-gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; + down-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; + left-gpios = <&gpio0 2 GPIO_ACTIVE_LOW>; + right-gpios = <&gpio0 3 GPIO_ACTIVE_LOW>; + button-left-gpios = <&gpio0 4 GPIO_ACTIVE_LOW>; + button-middle-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; + button-right-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>; + }; diff --git a/Documentation/devicetree/bindings/input/iqs269a.yaml b/Documentation/devicetree/bindings/input/iqs269a.yaml index 3c430d38594f111126268f54774aaa4469fe1754..2c3f693b8982c4947da6f2ce6d76dd2b28add81e 100644 --- a/Documentation/devicetree/bindings/input/iqs269a.yaml +++ b/Documentation/devicetree/bindings/input/iqs269a.yaml @@ -9,6 +9,9 @@ title: Azoteq IQS269A Capacitive Touch Controller maintainers: - Jeff LaBundy +allOf: + - $ref: input.yaml# + description: | The Azoteq IQS269A is an 8-channel capacitive touch controller that features additional Hall-effect and inductive sensing capabilities. @@ -17,7 +20,10 @@ description: | properties: compatible: - const: azoteq,iqs269a + enum: + - azoteq,iqs269a + - azoteq,iqs269a-00 + - azoteq,iqs269a-d0 reg: maxItems: 1 @@ -204,6 +210,73 @@ properties: default: 1 description: Specifies the slider coordinate filter strength. + azoteq,touch-hold-ms: + multipleOf: 256 + minimum: 256 + maximum: 65280 + default: 5120 + description: + Specifies the length of time (in ms) for which the channel selected by + 'azoteq,gpio3-select' must be held in a state of touch in order for an + approximately 60-ms pulse to be asserted on the GPIO4 pin. + + linux,keycodes: + minItems: 1 + maxItems: 8 + description: | + Specifies the numeric keycodes associated with each available gesture in + the following order (enter 0 for unused gestures): + 0: Slider 0 tap + 1: Slider 0 hold + 2: Slider 0 positive flick or swipe + 3: Slider 0 negative flick or swipe + 4: Slider 1 tap + 5: Slider 1 hold + 6: Slider 1 positive flick or swipe + 7: Slider 1 negative flick or swipe + + azoteq,gesture-swipe: + type: boolean + description: + Directs the device to interpret axial gestures as a swipe (finger remains + on slider) instead of a flick (finger leaves slider). + + azoteq,timeout-tap-ms: + multipleOf: 16 + minimum: 0 + maximum: 4080 + default: 400 + description: + Specifies the length of time (in ms) within which a slider touch must be + released in order to be interpreted as a tap. Default and maximum values + as well as step size are reduced by a factor of 4 with device version 2. + + azoteq,timeout-swipe-ms: + multipleOf: 16 + minimum: 0 + maximum: 4080 + default: 2000 + description: + Specifies the length of time (in ms) within which an axial gesture must be + completed in order to be interpreted as a flick or swipe. Default and max- + imum values as well as step size are reduced by a factor of 4 with device + version 2. + + azoteq,thresh-swipe: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 255 + default: 128 + description: + Specifies the number of points across which an axial gesture must travel + in order to be interpreted as a flick or swipe. + +dependencies: + azoteq,gesture-swipe: ["linux,keycodes"] + azoteq,timeout-tap-ms: ["linux,keycodes"] + azoteq,timeout-swipe-ms: ["linux,keycodes"] + azoteq,thresh-swipe: ["linux,keycodes"] + patternProperties: "^channel@[0-7]$": type: object @@ -454,6 +527,21 @@ patternProperties: additionalProperties: false +if: + properties: + compatible: + contains: + enum: + - azoteq,iqs269a-d0 +then: + patternProperties: + "^channel@[0-7]$": + properties: + azoteq,slider1-select: false +else: + properties: + azoteq,touch-hold-ms: false + required: - compatible - reg @@ -484,6 +572,14 @@ examples: azoteq,hall-enable; azoteq,suspend-mode = <2>; + linux,keycodes = , + , + , + ; + + azoteq,timeout-tap-ms = <400>; + azoteq,timeout-swipe-ms = <800>; + channel@0 { reg = <0x0>; diff --git a/Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml b/Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml index e34c9e78d38d8c0d65043ec24438681fe59119ef..70567d92c746ef8bc54eca9652b4f69fb579e74c 100644 --- a/Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml +++ b/Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml @@ -90,26 +90,4 @@ required: unevaluatedProperties: false -examples: - - | - #include - #include - - pmic { - compatible = "mediatek,mt6397"; - - keys { - compatible = "mediatek,mt6397-keys"; - mediatek,long-press-mode = <1>; - power-off-time-sec = <0>; - - key-power { - linux,keycodes = ; - wakeup-source; - }; - - key-home { - linux,keycodes = ; - }; - }; - }; +... diff --git a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml index 5b5d4f7d34827a12550df3d1480474b69caece2e..7ade03f1b32b8108f053523ce2a170fb55a54b3b 100644 --- a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml +++ b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml @@ -45,13 +45,13 @@ properties: Enables the Linux input system's autorepeat feature on the input device. linux,keycodes: - minItems: 6 - maxItems: 6 + minItems: 3 + maxItems: 8 description: | Specifies an array of numeric keycode values to be used for the channels. If this property is omitted, KEY_A, KEY_B, etc are used as defaults. - The array must have exactly six entries. + The number of entries must correspond to the number of channels. microchip,sensor-gain: $ref: /schemas/types.yaml#/definitions/uint32 @@ -70,6 +70,59 @@ properties: open drain. This property allows using the active high push-pull output. + microchip,sensitivity-delta-sense: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 32 + enum: [1, 2, 4, 8, 16, 32, 64, 128] + description: + Controls the sensitivity multiplier of a touch detection. + Higher value means more sensitive settings. + At the more sensitive settings, touches are detected for a smaller delta + capacitance corresponding to a "lighter" touch. + + microchip,signal-guard: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 3 + maxItems: 8 + items: + enum: [0, 1] + description: | + 0 - off + 1 - on + The signal guard isolates the signal from virtual grounds. + If enabled then the behavior of the channel is changed to signal guard. + The number of entries must correspond to the number of channels. + + microchip,input-threshold: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 3 + maxItems: 8 + items: + minimum: 0 + maximum: 127 + description: + Specifies the delta threshold that is used to determine if a touch has + been detected. A higher value means a larger difference in capacitance + is required for a touch to be registered, making the touch sensor less + sensitive. + The number of entries must correspond to the number of channels. + + microchip,calib-sensitivity: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 3 + maxItems: 8 + items: + enum: [1, 2, 4] + description: | + Specifies an array of numeric values that controls the gain + used by the calibration routine to enable sensor inputs + to be more sensitive for proximity detection. + Gain is based on touch pad capacitance range + 1 - 5-50pF + 2 - 0-25pF + 4 - 0-12.5pF + The number of entries must correspond to the number of channels. + patternProperties: "^led@[0-7]$": type: object @@ -99,10 +152,29 @@ allOf: contains: enum: - microchip,cap1106 + - microchip,cap1203 + - microchip,cap1206 + - microchip,cap1293 + - microchip,cap1298 then: patternProperties: "^led@[0-7]$": false + - if: + properties: + compatible: + contains: + enum: + - microchip,cap1106 + - microchip,cap1126 + - microchip,cap1188 + - microchip,cap1203 + - microchip,cap1206 + then: + properties: + microchip,signal-guard: false + microchip,calib-sensitivity: false + required: - compatible - interrupts @@ -122,6 +194,8 @@ examples: reg = <0x28>; autorepeat; microchip,sensor-gain = <2>; + microchip,sensitivity-delta-sense = <16>; + microchip,input-threshold = <21>, <18>, <46>, <46>, <46>, <21>; linux,keycodes = <103>, /* KEY_UP */ <106>, /* KEY_RIGHT */ diff --git a/Documentation/devicetree/bindings/input/sprd,sc27xx-vibrator.yaml b/Documentation/devicetree/bindings/input/sprd,sc27xx-vibrator.yaml index a401a0bfcbec21e098e4fe7bf9b85410d6ea83a8..4c8d303ff93c949f63926e544b2023e6bb8d492f 100644 --- a/Documentation/devicetree/bindings/input/sprd,sc27xx-vibrator.yaml +++ b/Documentation/devicetree/bindings/input/sprd,sc27xx-vibrator.yaml @@ -28,21 +28,4 @@ required: additionalProperties: false -examples: - - | - #include - sc2731_pmic: pmic@0 { - compatible = "sprd,sc2731"; - reg = <0 0>; - spi-max-frequency = <26000000>; - interrupts = ; - interrupt-controller; - #interrupt-cells = <2>; - #address-cells = <1>; - #size-cells = <0>; - - vibrator@eb4 { - compatible = "sprd,sc2731-vibrator"; - reg = <0xeb4>; - }; - }; +... diff --git a/Documentation/devicetree/bindings/input/ti,drv2665.txt b/Documentation/devicetree/bindings/input/ti,drv2665.txt deleted file mode 100644 index 1ba97ac04305873fab152755d02e48a7c4bd9542..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/ti,drv2665.txt +++ /dev/null @@ -1,17 +0,0 @@ -* Texas Instruments - drv2665 Haptics driver - -Required properties: - - compatible - "ti,drv2665" - DRV2665 - - reg - I2C slave address - - vbat-supply - Required supply regulator - -Example: - -haptics: haptics@59 { - compatible = "ti,drv2665"; - reg = <0x59>; - vbat-supply = <&vbat>; -}; - -For more product information please see the link below: -http://www.ti.com/product/drv2665 diff --git a/Documentation/devicetree/bindings/input/ti,drv2667.txt b/Documentation/devicetree/bindings/input/ti,drv2667.txt deleted file mode 100644 index 996382cf994a1203073bd70fe9b104977642ea77..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/ti,drv2667.txt +++ /dev/null @@ -1,17 +0,0 @@ -* Texas Instruments - drv2667 Haptics driver - -Required properties: - - compatible - "ti,drv2667" - DRV2667 - - reg - I2C slave address - - vbat-supply - Required supply regulator - -Example: - -haptics: haptics@59 { - compatible = "ti,drv2667"; - reg = <0x59>; - vbat-supply = <&vbat>; -}; - -For more product information please see the link below: -http://www.ti.com/product/drv2667 diff --git a/Documentation/devicetree/bindings/input/ti,drv266x.yaml b/Documentation/devicetree/bindings/input/ti,drv266x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da1818824373858c39bd35ae795dd93f9ad45991 --- /dev/null +++ b/Documentation/devicetree/bindings/input/ti,drv266x.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/ti,drv266x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments - drv266x Haptics driver + +description: | + Product Page: + http://www.ti.com/product/drv2665 + http://www.ti.com/product/drv2667 + +maintainers: + - Anshul Dalal + +properties: + compatible: + enum: + - ti,drv2665 + - ti,drv2667 + + reg: + maxItems: 1 + + vbat-supply: + description: Required supply regulator + +required: + - compatible + - reg + - vbat-supply + +additionalProperties: false + +examples: + - | + #include + + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + haptics@59 { + compatible = "ti,drv2667"; + reg = <0x59>; + vbat-supply = <&vbat>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/neonode,zforce.yaml b/Documentation/devicetree/bindings/input/touchscreen/neonode,zforce.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2ee89b76ea138a045f607edb83cf6b7b08fa905 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/neonode,zforce.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/neonode,zforce.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Neonode infrared touchscreen controller + +maintainers: + - Heiko Stuebner + +allOf: + - $ref: touchscreen.yaml# + +properties: + compatible: + const: neonode,zforce + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + reset-gpios: + maxItems: 1 + + irq-gpios: + maxItems: 1 + + x-size: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + + y-size: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + + vdd-supply: true + +required: + - compatible + - reg + - interrupts + - reset-gpios + +unevaluatedProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@50 { + compatible = "neonode,zforce"; + reg = <0x50>; + interrupts = <2 0>; + vdd-supply = <®_zforce_vdd>; + + reset-gpios = <&gpio5 9 0>; /* RST */ + irq-gpios = <&gpio5 6 0>; /* IRQ, optional */ + + touchscreen-min-x = <0>; + touchscreen-size-x = <800>; + touchscreen-min-y = <0>; + touchscreen-size-y = <600>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.txt b/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.txt deleted file mode 100644 index 6805d10d226d394a2a1ac35994c2def38acc50b5..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.txt +++ /dev/null @@ -1,32 +0,0 @@ -* Samsung S6SY761 touchscreen controller - -Required properties: -- compatible : must be "samsung,s6sy761" -- reg : I2C slave address, (e.g. 0x48) -- interrupts : interrupt specification -- avdd-supply : analogic power supply -- vdd-supply : power supply - -Optional properties: -- touchscreen-size-x : see touchscreen.txt. This property is embedded in the - device. If defined it forces a different x resolution. -- touchscreen-size-y : see touchscreen.txt. This property is embedded in the - device. If defined it forces a different y resolution. - -Example: - -i2c@00000000 { - - /* ... */ - - touchscreen@48 { - compatible = "samsung,s6sy761"; - reg = <0x48>; - interrupt-parent = <&gpa1>; - interrupts = <1 IRQ_TYPE_NONE>; - avdd-supply = <&ldo30_reg>; - vdd-supply = <&ldo31_reg>; - touchscreen-size-x = <4096>; - touchscreen-size-y = <4096>; - }; -}; diff --git a/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.yaml b/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ffd17af3c53f1e2d6afdf859c8239e20fe041e5 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.yaml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/samsung,s6sy761.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung S6SY761 touchscreen controller + +maintainers: + - Andi Shyti + +allOf: + - $ref: touchscreen.yaml# + +properties: + compatible: + const: samsung,s6sy761 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + avdd-supply: true + vdd-supply: true + +unevaluatedProperties: false + +required: + - compatible + - reg + - interrupts + - avdd-supply + - vdd-supply + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@48 { + compatible = "samsung,s6sy761"; + reg = <0x48>; + interrupt-parent = <&gpa1>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + avdd-supply = <&ldo30_reg>; + vdd-supply = <&ldo31_reg>; + touchscreen-size-x = <4096>; + touchscreen-size-y = <4096>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt deleted file mode 100644 index e3c27c4fd9c851fbbb51791084b280a9e343827d..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt +++ /dev/null @@ -1,34 +0,0 @@ -* Neonode infrared touchscreen controller - -Required properties: -- compatible: must be "neonode,zforce" -- reg: I2C address of the chip -- interrupts: interrupt to which the chip is connected -- reset-gpios: reset gpio the chip is connected to -- x-size: horizontal resolution of touchscreen -- y-size: vertical resolution of touchscreen - -Optional properties: -- irq-gpios : interrupt gpio the chip is connected to -- vdd-supply: Regulator controlling the controller supply - -Example: - - i2c@00000000 { - /* ... */ - - zforce_ts@50 { - compatible = "neonode,zforce"; - reg = <0x50>; - interrupts = <2 0>; - vdd-supply = <®_zforce_vdd>; - - reset-gpios = <&gpio5 9 0>; /* RST */ - irq-gpios = <&gpio5 6 0>; /* IRQ, optional */ - - x-size = <800>; - y-size = <600>; - }; - - /* ... */ - }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml index 00b570c82903974cbaeaba09406ff0581c61d8e0..60441f0c5d7211f24b5fa536425b351767040a2d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml @@ -11,8 +11,13 @@ maintainers: description: | This interrupt controller is found in the Loongson-3 family of chips and - Loongson-2K1000 chip, as the primary package interrupt controller which + Loongson-2K series chips, as the primary package interrupt controller which can route local I/O interrupt to interrupt lines of cores. + Be aware of the following points. + 1.The Loongson-2K0500 is a single core CPU; + 2.The Loongson-2K0500/2K1000 has 64 device interrupt sources as inputs, so we + need to define two nodes in dts{i} to describe the "0-31" and "32-61" interrupt + sources respectively. allOf: - $ref: /schemas/interrupt-controller.yaml# @@ -33,6 +38,7 @@ properties: - const: main - const: isr0 - const: isr1 + minItems: 2 interrupt-controller: true @@ -45,11 +51,9 @@ properties: interrupt-names: description: List of names for the parent interrupts. items: - - const: int0 - - const: int1 - - const: int2 - - const: int3 + pattern: int[0-3] minItems: 1 + maxItems: 4 '#interrupt-cells': const: 2 @@ -69,6 +73,7 @@ required: - compatible - reg - interrupts + - interrupt-names - interrupt-controller - '#interrupt-cells' - loongson,parent_int_map @@ -86,7 +91,8 @@ if: then: properties: reg: - minItems: 3 + minItems: 2 + maxItems: 3 required: - reg-names diff --git a/Documentation/devicetree/bindings/iommu/apple,dart.yaml b/Documentation/devicetree/bindings/iommu/apple,dart.yaml index 903edf85d72e4057d76a00e22b9ee824a4089c0c..7adb1de455a5b38dfb6c76303f11c8f95395e0dd 100644 --- a/Documentation/devicetree/bindings/iommu/apple,dart.yaml +++ b/Documentation/devicetree/bindings/iommu/apple,dart.yaml @@ -24,6 +24,7 @@ properties: compatible: enum: - apple,t8103-dart + - apple,t8103-usb4-dart - apple,t8110-dart - apple,t6000-dart diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index aa9e1c0895a508a2e6eed5831a73e92de06df9d4..a4042ae2477024b0230d7db843c74f6b1da7d732 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -56,6 +56,8 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 + - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -89,6 +91,8 @@ properties: - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + - qcom,sm8550-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -429,6 +433,30 @@ allOf: - description: interface clock required to access smmu's registers through the TCU's programming interface. + - if: + properties: + compatible: + items: + - enum: + - qcom,sm8350-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + then: + properties: + clock-names: + items: + - const: bus + - const: iface + - const: ahb + - const: hlos1_vote_gpu_smmu + - const: cx_gmu + - const: hub_cx_int + - const: hub_aon + clocks: + minItems: 7 + maxItems: 7 + - if: properties: compatible: @@ -453,6 +481,50 @@ allOf: - description: Voter clock required for HLOS SMMU access - description: Interface clock required for register access + - if: + properties: + compatible: + const: qcom,sm8450-smmu-500 + then: + properties: + clock-names: + items: + - const: gmu + - const: hub + - const: hlos + - const: bus + - const: iface + - const: ahb + + clocks: + items: + - description: GMU clock + - description: GPU HUB clock + - description: HLOS vote clock + - description: GPU memory bus clock + - description: GPU SNoC bus clock + - description: GPU AHB clock + + - if: + properties: + compatible: + const: qcom,sm8550-smmu-500 + then: + properties: + clock-names: + items: + - const: hlos + - const: bus + - const: iface + - const: ahb + + clocks: + items: + - description: HLOS vote clock + - description: GPU memory bus clock + - description: GPU SNoC bus clock + - description: GPU AHB clock + # Disallow clocks for all other platforms with specific compatibles - if: properties: @@ -472,9 +544,8 @@ allOf: - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - - qcom,sm8350-smmu-500 - - qcom,sm8450-smmu-500 - - qcom,sm8550-smmu-500 + - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 then: properties: clock-names: false diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml index ba9124f721f1514759d5a63c2036283a04e1d0aa..621dde0e45d8514cfcfe5a0cdd17f87a08ada5bc 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -19,9 +19,14 @@ description: |+ properties: compatible: - enum: - - rockchip,iommu - - rockchip,rk3568-iommu + oneOf: + - enum: + - rockchip,iommu + - rockchip,rk3568-iommu + - items: + - enum: + - rockchip,rk3588-iommu + - const: rockchip,rk3568-iommu reg: items: diff --git a/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml b/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml index 4191e33626f51ad3ccb5843e61caeb83eafea193..527a37368ed7422ae605d693e95b2765fa15246d 100644 --- a/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml @@ -14,8 +14,8 @@ description: | programmable switching frequency to optimize efficiency. It supports two different dimming modes: - - analog mode, via I2C commands (default) - - PWM controlled mode. + - analog mode, via I2C commands, as default mode (32 dimming levels) + - PWM controlled mode (optional) The datasheet is available at: https://www.monolithicpower.com/en/mp3309c.html @@ -50,8 +50,6 @@ properties: required: - compatible - reg - - max-brightness - - default-brightness unevaluatedProperties: false @@ -66,8 +64,8 @@ examples: compatible = "mps,mp3309c"; reg = <0x17>; pwms = <&pwm1 0 3333333 0>; /* 300 Hz --> (1/f) * 1*10^9 */ - max-brightness = <100>; - default-brightness = <80>; + brightness-levels = <0 4 8 16 32 64 128 255>; + default-brightness = <6>; mps,overvoltage-protection-microvolt = <24000000>; }; }; diff --git a/Documentation/devicetree/bindings/loongarch/cpus.yaml b/Documentation/devicetree/bindings/loongarch/cpus.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f175872995e117b18b3cc141d7044d1a390e0f06 --- /dev/null +++ b/Documentation/devicetree/bindings/loongarch/cpus.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/loongarch/cpus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: LoongArch CPUs + +maintainers: + - Binbin Zhou + +description: + This document describes the list of LoongArch CPU cores that support FDT, + it describe the layout of CPUs in a system through the "cpus" node. + +allOf: + - $ref: /schemas/cpu.yaml# + +properties: + compatible: + enum: + - loongson,la264 + - loongson,la364 + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + + cpus { + #size-cells = <0>; + #address-cells = <1>; + + cpu@0 { + compatible = "loongson,la264"; + device_type = "cpu"; + reg = <0>; + clocks = <&clk LOONGSON2_NODE_CLK>; + }; + + cpu@1 { + compatible = "loongson,la264"; + device_type = "cpu"; + reg = <1>; + clocks = <&clk LOONGSON2_NODE_CLK>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/loongarch/loongson.yaml b/Documentation/devicetree/bindings/loongarch/loongson.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1a4a97b7576c9401e746e6d7a06456282b5287a --- /dev/null +++ b/Documentation/devicetree/bindings/loongarch/loongson.yaml @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/loongarch/loongson.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson SoC-based boards + +maintainers: + - Binbin Zhou + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: Loongson-2K0500 processor based boards + items: + - const: loongson,ls2k0500-ref + - const: loongson,ls2k0500 + + - description: Loongson-2K1000 processor based boards + items: + - const: loongson,ls2k1000-ref + - const: loongson,ls2k1000 + + - description: Loongson-2K2000 processor based boards + items: + - const: loongson,ls2k2000-ref + - const: loongson,ls2k2000 + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/phy/amlogic,g12a-mipi-dphy-analog.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-mipi-dphy-analog.yaml index c8c83acfb871d0f45a871bcac7167ca4fbc91997..81c2654b7e57e98ce89e44c46b6135b7fce49b59 100644 --- a/Documentation/devicetree/bindings/phy/amlogic,g12a-mipi-dphy-analog.yaml +++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-mipi-dphy-analog.yaml @@ -16,20 +16,8 @@ properties: "#phy-cells": const: 0 - reg: - maxItems: 1 - required: - compatible - - reg - "#phy-cells" additionalProperties: false - -examples: - - | - phy@0 { - compatible = "amlogic,g12a-mipi-dphy-analog"; - reg = <0x0 0xc>; - #phy-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml index 009a3980831856b9db5d99d90c8b8f670dcf5333..70def36e5688d0153b92166e5e35de67378c6698 100644 --- a/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml +++ b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml @@ -9,16 +9,6 @@ title: Amlogic AXG shared MIPI/PCIE analog PHY maintainers: - Remi Pommarel -description: |+ - The Everything-Else Power Domains node should be the child of a syscon - node with the required property: - - - compatible: Should be the following: - "amlogic,meson-gx-hhi-sysctrl", "simple-mfd", "syscon" - - Refer to the bindings described in - Documentation/devicetree/bindings/mfd/syscon.yaml - properties: compatible: const: amlogic,axg-mipi-pcie-analog-phy @@ -31,10 +21,3 @@ required: - "#phy-cells" additionalProperties: false - -examples: - - | - mpphy: phy { - compatible = "amlogic,axg-mipi-pcie-analog-phy"; - #phy-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml index 6703689fcdbe103f791a189e0bb8f8ab095975d9..f6e494d0d89b82dabed0265782dbd9cd83de5737 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml @@ -31,6 +31,7 @@ properties: - items: - enum: - mediatek,mt8188-mipi-tx + - mediatek,mt8195-mipi-tx - mediatek,mt8365-mipi-tx - const: mediatek,mt8183-mipi-tx - const: mediatek,mt2701-mipi-tx diff --git a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml index 2bb91542e984e788b20db49204cd97880ba6b224..acba0720125ddd3d327df154a3f30c6e2e8608aa 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml @@ -235,6 +235,15 @@ patternProperties: Specify the flag to enable BC1.2 if support it type: boolean + mediatek,force-mode: + description: + The force mode is used to manually switch the shared phy mode between + USB3 and PCIe, when USB3 phy type is selected by the consumer, and + force-mode is set, will cause phy's power and pipe toggled and force + phy as USB3 mode which switched from default PCIe mode. But perfer to + use the property "mediatek,syscon-type" for newer SoCs that support it. + type: boolean + mediatek,syscon-type: $ref: /schemas/types.yaml#/definitions/phandle-array maxItems: 1 diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index 2c3d6553a7bac692f688adaf9ac5ff568d862e46..6c03f2d5fca3cca6ad0cccc4ae3f8679e4c59026 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -36,6 +36,8 @@ properties: - qcom,sm8450-qmp-gen4x2-pcie-phy - qcom,sm8550-qmp-gen3x2-pcie-phy - qcom,sm8550-qmp-gen4x2-pcie-phy + - qcom,sm8650-qmp-gen3x2-pcie-phy + - qcom,sm8650-qmp-gen4x2-pcie-phy reg: minItems: 1 @@ -147,6 +149,8 @@ allOf: - qcom,sm8450-qmp-gen3x2-pcie-phy - qcom,sm8550-qmp-gen3x2-pcie-phy - qcom,sm8550-qmp-gen4x2-pcie-phy + - qcom,sm8650-qmp-gen3x2-pcie-phy + - qcom,sm8650-qmp-gen4x2-pcie-phy then: properties: clocks: @@ -189,6 +193,7 @@ allOf: contains: enum: - qcom,sm8550-qmp-gen4x2-pcie-phy + - qcom,sm8650-qmp-gen4x2-pcie-phy then: properties: resets: diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml index f3a3296c811cb3403201642703c77060b7ed1c36..8474eef8d0ff5233a075bf5c17ca0abaa14fbd41 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -32,6 +32,7 @@ properties: - qcom,sm8350-qmp-ufs-phy - qcom,sm8450-qmp-ufs-phy - qcom,sm8550-qmp-ufs-phy + - qcom,sm8650-qmp-ufs-phy reg: maxItems: 1 @@ -112,6 +113,7 @@ allOf: - qcom,sm8250-qmp-ufs-phy - qcom,sm8350-qmp-ufs-phy - qcom,sm8550-qmp-ufs-phy + - qcom,sm8650-qmp-ufs-phy then: properties: clocks: diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml index 57702f7f2a46cf54bcf6f660704144780faee8f7..15d82c67f157b6ceadc366540fca0c200201d920 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -32,6 +32,7 @@ properties: - qcom,sm8150-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy - qcom,sm8350-qmp-usb3-uni-phy + - qcom,x1e80100-qmp-usb3-uni-phy reg: @@ -135,6 +136,7 @@ allOf: - qcom,sm8150-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy - qcom,sm8350-qmp-usb3-uni-phy + - qcom,x1e80100-qmp-usb3-uni-phy then: properties: clocks: @@ -171,6 +173,7 @@ allOf: enum: - qcom,sa8775p-qmp-usb3-uni-phy - qcom,sc8280xp-qmp-usb3-uni-phy + - qcom,x1e80100-qmp-usb3-uni-phy then: required: - power-domains diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml index 9af203dc8793f34ee6adf58813c5838fc71200e8..2d0d7e9e643117f5ec625e49270ac94c70603e7e 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml @@ -27,6 +27,8 @@ properties: - qcom,sm8350-qmp-usb3-dp-phy - qcom,sm8450-qmp-usb3-dp-phy - qcom,sm8550-qmp-usb3-dp-phy + - qcom,sm8650-qmp-usb3-dp-phy + - qcom,x1e80100-qmp-usb3-dp-phy reg: maxItems: 1 @@ -62,12 +64,12 @@ properties: "#clock-cells": const: 1 description: - See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + See include/dt-bindings/phy/phy-qcom-qmp.h "#phy-cells": const: 1 description: - See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + See include/dt-bindings/phy/phy-qcom-qmp.h orientation-switch: description: @@ -128,6 +130,8 @@ allOf: - qcom,sc8280xp-qmp-usb43dp-phy - qcom,sm6350-qmp-usb3-dp-phy - qcom,sm8550-qmp-usb3-dp-phy + - qcom,sm8650-qmp-usb3-dp-phy + - qcom,x1e80100-qmp-usb3-dp-phy then: required: - power-domains diff --git a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml index c95828607ab6bc6b23217ebe5e380c60f2d224a1..b82f7f5731ed4a6879ffbdc1b970fdfe0557f944 100644 --- a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml @@ -18,6 +18,8 @@ properties: - items: - enum: - qcom,sdx75-snps-eusb2-phy + - qcom,sm8650-snps-eusb2-phy + - qcom,x1e80100-snps-eusb2-phy - const: qcom,sm8550-snps-eusb2-phy - const: qcom,sm8550-snps-eusb2-phy diff --git a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml index 14a262bcbf7cd21ebf62ee02f738bf82cbde80e1..627f8a6078c299e32e4bc7597509a6ef52d119d7 100644 --- a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml +++ b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml @@ -28,17 +28,15 @@ properties: items: - const: reboot-mode -patternProperties: - "^mode-.+": - $ref: /schemas/types.yaml#/definitions/uint32 - description: Vendor-specific mode value written to the mode register +allOf: + - $ref: reboot-mode.yaml# required: - compatible - nvmem-cells - nvmem-cell-names -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml index 5e460128b0d10911a541c959a9581efcb8b4d393..fc8105a7b9b268df5cb08ad32cde26c50ea955ce 100644 --- a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml +++ b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml @@ -111,21 +111,24 @@ examples: #include #include #include - spmi_bus: spmi@c440000 { + + spmi@c440000 { reg = <0x0c440000 0x1100>; #address-cells = <2>; #size-cells = <0>; - pmk8350: pmic@0 { + + pmic@0 { reg = <0x0 SPMI_USID>; #address-cells = <1>; #size-cells = <0>; - pmk8350_pon: pon_hlos@1300 { - reg = <0x1300>; + + pon@800 { compatible = "qcom,pm8998-pon"; + reg = <0x800>; pwrkey { compatible = "qcom,pm8941-pwrkey"; - interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >; + interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>; debounce = <15625>; bias-pull-up; linux,code = ; diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml index 9b1ffceefe3dec86250dc235f92545bb123ea3cf..b6acff199cdecea08c1243ed5e8ad71240d65e9a 100644 --- a/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml +++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml @@ -29,12 +29,10 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 description: Offset in the register map for the mode register (in bytes) -patternProperties: - "^mode-.+": - $ref: /schemas/types.yaml#/definitions/uint32 - description: Vendor-specific mode value written to the mode register +allOf: + - $ref: reboot-mode.yaml# -additionalProperties: false +unevaluatedProperties: false required: - compatible diff --git a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml index 45792e216981a99de457cc99ea2d8f2dfd130136..799831636194f50ffdb139bd146d8905802bd474 100644 --- a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml +++ b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml @@ -57,7 +57,7 @@ examples: firmware { zynqmp-firmware { - zynqmp-power { + power-management { compatible = "xlnx,zynqmp-power"; interrupts = <0 35 4>; }; @@ -70,7 +70,7 @@ examples: firmware { zynqmp-firmware { - zynqmp-power { + power-management { compatible = "xlnx,zynqmp-power"; interrupt-parent = <&gic>; interrupts = <0 35 4>; diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml index d3ebc9de8c0b49734cb5ce2b138a2d4b67bed7f9..131b7e57d22f46d28a9501c5641fd3c03b7e6ca0 100644 --- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml @@ -20,6 +20,7 @@ properties: - ti,bq24192 - ti,bq24192i - ti,bq24196 + - ti,bq24296 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 23646b684ea279b1962e555736683c906135f569..9d8670c00e3b3bdea5d2196b98538d97465abf0d 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -63,8 +63,8 @@ properties: mmu-type: description: - Identifies the MMU address translation mode used on this - hart. These values originate from the RISC-V Privileged + Identifies the largest MMU address translation mode supported by + this hart. These values originate from the RISC-V Privileged Specification document, available from https://riscv.org/specifications/ $ref: /schemas/types.yaml#/definitions/string @@ -80,6 +80,11 @@ properties: description: The blocksize in bytes for the Zicbom cache operations. + riscv,cbop-block-size: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The blocksize in bytes for the Zicbop cache operations. + riscv,cboz-block-size: $ref: /schemas/types.yaml#/definitions/uint32 description: diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 27beedb9819879180412fbd5bea753ef4a43df34..63d81dc895e5ce4c08715ce1d6bf0958a757ca86 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -48,7 +48,7 @@ properties: insensitive, letters in the riscv,isa string must be all lowercase. $ref: /schemas/types.yaml#/definitions/string - pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[0-9a-z])+)?(?:_[hsxz](?:[0-9a-z])+)*$ deprecated: true riscv,isa-base: diff --git a/Documentation/devicetree/bindings/rtc/adi,max31335.yaml b/Documentation/devicetree/bindings/rtc/adi,max31335.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0125cf6727cc3d9eb3e0253299904ee363ec40ca --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/adi,max31335.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/adi,max31335.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices MAX31335 RTC + +maintainers: + - Antoniu Miclaus + +description: + Analog Devices MAX31335 I2C RTC ±2ppm Automotive Real-Time Clock with + Integrated MEMS Resonator. + +allOf: + - $ref: rtc.yaml# + +properties: + compatible: + const: adi,max31335 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#clock-cells": + description: + RTC can be used as a clock source through its clock output pin. + const: 0 + + adi,tc-diode: + description: + Select the diode configuration for the trickle charger. + schottky - Schottky diode in series. + standard+schottky - standard diode + Schottky diode in series. + enum: [schottky, standard+schottky] + + trickle-resistor-ohms: + description: + Selected resistor for trickle charger. Should be specified if trickle + charger should be enabled. + enum: [3000, 6000, 11000] + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@68 { + compatible = "adi,max31335"; + reg = <0x68>; + pinctrl-0 = <&rtc_nint_pins>; + interrupts-extended = <&gpio1 16 IRQ_TYPE_LEVEL_HIGH>; + aux-voltage-chargeable = <1>; + trickle-resistor-ohms = <6000>; + adi,tc-diode = "schottky"; + }; + }; +... diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml index 1df7c45d95c18ef90c8e996be5b83bc243099155..b770149c5fd677137bbeee87178d4188e5a0b59b 100644 --- a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml +++ b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml @@ -29,6 +29,8 @@ properties: trickle-diode-disable: true + wakeup-source: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/rtc/nuvoton,ma35d1-rtc.yaml b/Documentation/devicetree/bindings/rtc/nuvoton,ma35d1-rtc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e4ade803eed1edb726b478d4cac2deef5d51cbe --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nuvoton,ma35d1-rtc.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/nuvoton,ma35d1-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton MA35D1 Real Time Clock + +maintainers: + - Min-Jen Chen + +allOf: + - $ref: rtc.yaml# + +properties: + compatible: + enum: + - nuvoton,ma35d1-rtc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + #include + rtc@40410000 { + compatible = "nuvoton,ma35d1-rtc"; + reg = <0x40410000 0x200>; + interrupts = ; + clocks = <&clk RTC_GATE>; + }; + +... diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml index b95a69cc9ae0fef4e4111bade35faa4a7ddb339f..d274bb7a534b55ef83a05da2c2b8446f38342c88 100644 --- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml @@ -61,27 +61,27 @@ additionalProperties: false examples: - | + #include #include - spmi_bus: spmi@c440000 { - reg = <0x0c440000 0x1100>; - #address-cells = <2>; - #size-cells = <0>; - pmicintc: pmic@0 { - reg = <0x0 SPMI_USID>; - compatible = "qcom,pm8921"; - interrupts = <104 8>; - #interrupt-cells = <2>; - interrupt-controller; - #address-cells = <1>; + + spmi { + #address-cells = <2>; #size-cells = <0>; - pm8921_rtc: rtc@11d { - compatible = "qcom,pm8921-rtc"; - reg = <0x11d>; - interrupts = <0x27 0>; - nvmem-cells = <&rtc_offset>; - nvmem-cell-names = "offset"; + pmic@0 { + compatible = "qcom,pm8941", "qcom,spmi-pmic"; + reg = <0x0 SPMI_USID>; + #address-cells = <1>; + #size-cells = <0>; + + rtc@6000 { + compatible = "qcom,pm8941-rtc"; + reg = <0x6000>, <0x6100>; + reg-names = "rtc", "alarm"; + interrupts = <0x0 0x61 0x1 IRQ_TYPE_EDGE_RISING>; + nvmem-cells = <&rtc_offset>; + nvmem-cell-names = "offset"; + }; }; - }; }; ... diff --git a/Documentation/devicetree/bindings/sound/tas2562.yaml b/Documentation/devicetree/bindings/sound/tas2562.yaml index f01c0dde0cf740e6ff9d500ddedb1e27d320f919..d28c102c0ce7f0fe94577e45b54daa7496331e7f 100644 --- a/Documentation/devicetree/bindings/sound/tas2562.yaml +++ b/Documentation/devicetree/bindings/sound/tas2562.yaml @@ -18,7 +18,6 @@ description: | Specifications about the audio amplifier can be found at: https://www.ti.com/lit/gpn/tas2562 - https://www.ti.com/lit/gpn/tas2563 https://www.ti.com/lit/gpn/tas2564 https://www.ti.com/lit/gpn/tas2110 @@ -29,7 +28,6 @@ properties: compatible: enum: - ti,tas2562 - - ti,tas2563 - ti,tas2564 - ti,tas2110 diff --git a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml index a69e6c223308e637de51d512cb18f210441dcc9e..9762386892495149c00259c59be3e04a3a09d2c6 100644 --- a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml +++ b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml @@ -5,36 +5,46 @@ $id: http://devicetree.org/schemas/sound/ti,tas2781.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments TAS2781 SmartAMP +title: Texas Instruments TAS2563/TAS2781 SmartAMP maintainers: - Shenghao Ding -description: - The TAS2781 is a mono, digital input Class-D audio amplifier - optimized for efficiently driving high peak power into small - loudspeakers. An integrated on-chip DSP supports Texas Instruments - Smart Amp speaker protection algorithm. The integrated speaker - voltage and current sense provides for real time +description: | + The TAS2563/TAS2781 is a mono, digital input Class-D audio + amplifier optimized for efficiently driving high peak power into + small loudspeakers. An integrated on-chip DSP supports Texas + Instruments Smart Amp speaker protection algorithm. The + integrated speaker voltage and current sense provides for real time monitoring of loudspeaker behavior. -allOf: - - $ref: dai-common.yaml# + Specifications about the audio amplifier can be found at: + https://www.ti.com/lit/gpn/tas2563 + https://www.ti.com/lit/gpn/tas2781 properties: compatible: - enum: - - ti,tas2781 + description: | + ti,tas2563: 6.1-W Boosted Class-D Audio Amplifier With Integrated + DSP and IV Sense, 16/20/24/32bit stereo I2S or multichannel TDM. + + ti,tas2781: 24-V Class-D Amplifier with Real Time Integrated Speaker + Protection and Audio Processing, 16/20/24/32bit stereo I2S or + multichannel TDM. + oneOf: + - items: + - enum: + - ti,tas2563 + - const: ti,tas2781 + - enum: + - ti,tas2781 reg: description: - I2C address, in multiple tas2781s case, all the i2c address + I2C address, in multiple-AMP case, all the i2c address aggregate as one Audio Device to support multiple audio slots. maxItems: 8 minItems: 1 - items: - minimum: 0x38 - maximum: 0x3f reset-gpios: maxItems: 1 @@ -49,6 +59,44 @@ required: - compatible - reg +allOf: + - $ref: dai-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - ti,tas2563 + then: + properties: + reg: + description: + I2C address, in multiple-AMP case, all the i2c address + aggregate as one Audio Device to support multiple audio slots. + maxItems: 4 + minItems: 1 + items: + minimum: 0x4c + maximum: 0x4f + + - if: + properties: + compatible: + contains: + enum: + - ti,tas2781 + then: + properties: + reg: + description: + I2C address, in multiple-AMP case, all the i2c address + aggregate as one Audio Device to support multiple audio slots. + maxItems: 8 + minItems: 1 + items: + minimum: 0x38 + maximum: 0x3f + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml index 4b6c20fc819434883fc68ba79c153f67ac807344..fced6f2d8ecbb35955e3800f19d58980b792a764 100644 --- a/Documentation/devicetree/bindings/timer/sifive,clint.yaml +++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml @@ -33,6 +33,7 @@ properties: - sifive,fu540-c000-clint # SiFive FU540 - starfive,jh7100-clint # StarFive JH7100 - starfive,jh7110-clint # StarFive JH7110 + - starfive,jh8100-clint # StarFive JH8100 - const: sifive,clint0 # SiFive CLINT v0 IP block - items: - enum: diff --git a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml index fbd235650e52cca3dc3e43792e0c730aab65699c..2e92bcdeb423abeca98da6868d5a116615c13bbc 100644 --- a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml +++ b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml @@ -17,7 +17,12 @@ properties: - const: thead,c900-aclint-mtimer reg: - maxItems: 1 + items: + - description: MTIMECMP Registers + + reg-names: + items: + - const: mtimecmp interrupts-extended: minItems: 1 @@ -28,6 +33,7 @@ additionalProperties: false required: - compatible - reg + - reg-names - interrupts-extended examples: @@ -39,5 +45,6 @@ examples: <&cpu3intc 7>, <&cpu4intc 7>; reg = <0xac000000 0x00010000>; + reg-names = "mtimecmp"; }; ... diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt index 8fd22073a847e9d1bbed3ccc5b9b59a5e71db300..d222bd3ee7495b86f711056aef5ccf5a180c5e20 100644 --- a/Documentation/features/vm/TLB/arch-support.txt +++ b/Documentation/features/vm/TLB/arch-support.txt @@ -20,7 +20,7 @@ | openrisc: | .. | | parisc: | TODO | | powerpc: | TODO | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | TODO | diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 48b95d04f72d5a25df0de37ac87a52d8f7471998..4cc657d743f7f3b24b9a3a15efd9608ee5775554 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -295,7 +295,6 @@ through which it can issue requests and negotiate:: struct netfs_request_ops { void (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); - int (*begin_cache_operation)(struct netfs_io_request *rreq); void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); @@ -317,20 +316,6 @@ The operations are as follows: [Optional] This is called as the request is being deallocated so that the filesystem can clean up any state it has attached there. - * ``begin_cache_operation()`` - - [Optional] This is called to ask the network filesystem to call into the - cache (if present) to initialise the caching state for this read. The netfs - library module cannot access the cache directly, so the cache should call - something like fscache_begin_read_operation() to do this. - - The cache gets to store its state in ->cache_resources and must set a table - of operations of its own there (though of a different type). - - This should return 0 on success and an error code otherwise. If an error is - reported, the operation may proceed anyway, just without local caching (only - out of memory and interruption errors cause failure here). - * ``expand_readahead()`` [Optional] This is called to allow the filesystem to expand the size of a @@ -460,14 +445,14 @@ When implementing a local cache to be used by the read helpers, two things are required: some way for the network filesystem to initialise the caching for a read request and a table of operations for the helpers to call. -The network filesystem's ->begin_cache_operation() method is called to set up a -cache and this must call into the cache to do the work. If using fscache, for -example, the cache would call:: +To begin a cache operation on an fscache object, the following function is +called:: int fscache_begin_read_operation(struct netfs_io_request *rreq, struct fscache_cookie *cookie); -passing in the request pointer and the cookie corresponding to the file. +passing in the request pointer and the cookie corresponding to the file. This +fills in the cache resources mentioned below. The netfs_io_request object contains a place for the cache to hang its state:: diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 7bed96d794fc2656d3bb9d69cbca3feef97ea6c8..6b30e43a0d11f49959d700e38fbfe115d71fd40c 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -73,15 +73,14 @@ Auto Negotiation Supported. Compound Request Supported. Oplock Cache Mechanism Supported. SMB2 leases(v1 lease) Supported. -Directory leases(v2 lease) Planned for future. +Directory leases(v2 lease) Supported. Multi-credits Supported. NTLM/NTLMv2 Supported. HMAC-SHA256 Signing Supported. Secure negotiate Supported. Signing Update Supported. Pre-authentication integrity Supported. -SMB3 encryption(CCM, GCM) Supported. (CCM and GCM128 supported, GCM256 in - progress) +SMB3 encryption(CCM, GCM) Supported. (CCM/GCM128 and CCM/GCM256 supported) SMB direct(RDMA) Supported. SMB3 Multi-channel Partially Supported. Planned to implement replay/retry mechanisms for future. @@ -112,6 +111,10 @@ DCE/RPC support Partially Supported. a few calls(NetShareEnumAll, for Witness protocol e.g.) ksmbd/nfsd interoperability Planned for future. The features that ksmbd support are Leases, Notify, ACLs and Share modes. +SMB3.1.1 Compression Planned for future. +SMB3.1.1 over QUIC Planned for future. +Signing/Encryption over RDMA Planned for future. +SMB3.1.1 GMAC signing support Planned for future. ============================== ================================================= diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index 1f0d81f44e14b25981dbb8c65c972fa9f20b55ce..c2046dec0c2f4065d81953e4164e706cb73d3d2c 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -66,6 +66,10 @@ for aligning variables/macros, for reflowing text and other similar tasks. See the file :ref:`Documentation/process/clang-format.rst ` for more details. +Some basic editor settings, such as indentation and line endings, will be +set automatically if you are using an editor that is compatible with +EditorConfig. See the official EditorConfig website for more information: +https://editorconfig.org/ Abstraction layers ****************** diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 6db37a46d3059ee8e3fe6c3ee80711b6bff26e0d..c48382c6b47746f57a090d7af838916a419ff481 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -735,6 +735,10 @@ for aligning variables/macros, for reflowing text and other similar tasks. See the file :ref:`Documentation/process/clang-format.rst ` for more details. +Some basic editor settings, such as indentation and line endings, will be +set automatically if you are using an editor that is compatible with +EditorConfig. See the official EditorConfig website for more information: +https://editorconfig.org/ 10) Kconfig configuration files ------------------------------- diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index b91e9ef4d0c21e45a4beb27eb8ac9f32a4d6669a..73203ba1e9011e3a5eb5d58f598f00f34b4ece80 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -12,10 +12,11 @@ which uses ``libclang``. Below is a general summary of architectures that currently work. Level of support corresponds to ``S`` values in the ``MAINTAINERS`` file. -============ ================ ============================================== -Architecture Level of support Constraints -============ ================ ============================================== -``um`` Maintained ``x86_64`` only. -``x86`` Maintained ``x86_64`` only. -============ ================ ============================================== +============= ================ ============================================== +Architecture Level of support Constraints +============= ================ ============================================== +``loongarch`` Maintained - +``um`` Maintained ``x86_64`` only. +``x86`` Maintained ``x86_64`` only. +============= ================ ============================================== diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 16122a8895ba30b3b31072e61586a470f1d06ee7..7e7b8ec1793483c4843b2281475d4cf9e300b021 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -218,6 +218,27 @@ of ftrace. Here is a list of some of the key files: This displays the total combined size of all the trace buffers. + buffer_subbuf_size_kb: + + This sets or displays the sub buffer size. The ring buffer is broken up + into several same size "sub buffers". An event can not be bigger than + the size of the sub buffer. Normally, the sub buffer is the size of the + architecture's page (4K on x86). The sub buffer also contains meta data + at the start which also limits the size of an event. That means when + the sub buffer is a page size, no event can be larger than the page + size minus the sub buffer meta data. + + Note, the buffer_subbuf_size_kb is a way for the user to specify the + minimum size of the subbuffer. The kernel may make it bigger due to the + implementation details, or simply fail the operation if the kernel can + not handle the request. + + Changing the sub buffer size allows for events to be larger than the + page size. + + Note: When changing the sub-buffer size, tracing is stopped and any + data in the ring buffer and the snapshot buffer will be discarded. + free_buffer: If a process is performing tracing, and the ring buffer should be diff --git a/MAINTAINERS b/MAINTAINERS index c8b6db2eca838a720c0ce889a967ed489df1e208..3f9efc2d7be88a7c060b874734a5df84b52c0c80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -441,6 +441,13 @@ W: http://wiki.analog.com/AD7879 W: https://ez.analog.com/linux-software-drivers F: drivers/input/touchscreen/ad7879.c +ADAFRUIT MINI I2C GAMEPAD +M: Anshul Dalal +L: linux-input@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/input/adafruit,seesaw-gamepad.yaml +F: drivers/input/joystick/adafruit-seesaw.c + ADDRESS SPACE LAYOUT RANDOMIZATION (ASLR) M: Jiri Kosina S: Maintained @@ -3383,9 +3390,8 @@ F: Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml F: drivers/iio/adc/hx711.c AX.25 NETWORK LAYER -M: Ralf Baechle L: linux-hams@vger.kernel.org -S: Maintained +S: Orphan W: https://linux-ax25.in-berlin.de F: include/net/ax25.h F: include/uapi/linux/ax25.h @@ -3624,7 +3630,6 @@ F: drivers/mtd/devices/block2mtd.c BLUETOOTH DRIVERS M: Marcel Holtmann -M: Johan Hedberg M: Luiz Augusto von Dentz L: linux-bluetooth@vger.kernel.org S: Supported @@ -3687,6 +3692,13 @@ L: bpf@vger.kernel.org S: Supported F: arch/arm64/net/ +BPF JIT for LOONGARCH +M: Tiezhu Yang +R: Hengqi Chen +L: bpf@vger.kernel.org +S: Maintained +F: arch/loongarch/net/ + BPF JIT for MIPS (32-BIT AND 64-BIT) M: Johan Almbladh M: Paul Burton @@ -4719,11 +4731,8 @@ F: drivers/i2c/busses/i2c-octeon* F: drivers/i2c/busses/i2c-thunderx* CAVIUM LIQUIDIO NETWORK DRIVER -M: Derek Chickles -M: Satanand Burla -M: Felix Manlunas L: netdev@vger.kernel.org -S: Supported +S: Orphan W: http://www.marvell.com F: drivers/net/ethernet/cavium/liquidio/ @@ -5227,7 +5236,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French R: Paulo Alcantara (DFS, global name space) -R: Ronnie Sahlberg (directory leases, sparse files) +R: Ronnie Sahlberg (directory leases, sparse files) R: Shyam Prasad N (multichannel) R: Tom Talpey (RDMA, smbdirect) L: linux-cifs@vger.kernel.org @@ -5280,6 +5289,7 @@ M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained F: drivers/cxl/ +F: include/linux/cxl-event.h F: include/uapi/linux/cxl_mem.h F: tools/testing/cxl/ @@ -7945,12 +7955,13 @@ L: rust-for-linux@vger.kernel.org S: Maintained F: rust/kernel/net/phy.rs -EXEC & BINFMT API +EXEC & BINFMT API, ELF R: Eric Biederman R: Kees Cook L: linux-mm@kvack.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve +F: Documentation/userspace-api/ELF.rst F: fs/*binfmt_*.c F: fs/exec.c F: include/linux/binfmts.h @@ -8211,6 +8222,19 @@ S: Supported F: fs/iomap/ F: include/linux/iomap.h +FILESYSTEMS [NETFS LIBRARY] +M: David Howells +L: linux-cachefs@redhat.com (moderated for non-subscribers) +L: linux-fsdevel@vger.kernel.org +S: Supported +F: Documentation/filesystems/caching/ +F: Documentation/filesystems/netfs_library.rst +F: fs/netfs/ +F: include/linux/fscache*.h +F: include/linux/netfs.h +F: include/trace/events/fscache.h +F: include/trace/events/netfs.h + FILESYSTEMS [STACKABLE] M: Miklos Szeredi M: Amir Goldstein @@ -8656,14 +8680,6 @@ F: Documentation/power/freezing-of-tasks.rst F: include/linux/freezer.h F: kernel/freezer.c -FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS -M: David Howells -L: linux-cachefs@redhat.com (moderated for non-subscribers) -S: Supported -F: Documentation/filesystems/caching/ -F: fs/fscache/ -F: include/linux/fscache*.h - FSCRYPT: FILE SYSTEM LEVEL ENCRYPTION SUPPORT M: Eric Biggers M: Theodore Y. Ts'o @@ -10053,7 +10069,7 @@ F: Documentation/i2c/busses/i2c-parport.rst F: drivers/i2c/busses/i2c-parport.c I2C SUBSYSTEM -M: Wolfram Sang +M: Wolfram Sang L: linux-i2c@vger.kernel.org S: Maintained W: https://i2c.wiki.kernel.org/ @@ -10231,7 +10247,6 @@ IBM Power SRIOV Virtual NIC Device Driver M: Haren Myneni M: Rick Lindsley R: Nick Child -R: Dany Madden R: Thomas Falcon L: netdev@vger.kernel.org S: Supported @@ -12632,6 +12647,13 @@ S: Maintained F: Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml F: drivers/gpio/gpio-loongson-64bit.c +LOONGSON LS2X APB DMA DRIVER +M: Binbin Zhou +L: dmaengine@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml +F: drivers/dma/ls2x-apb-dma.c + LOONGSON LS2X I2C DRIVER M: Binbin Zhou L: linux-i2c@vger.kernel.org @@ -13002,10 +13024,10 @@ S: Maintained F: drivers/thermal/armada_thermal.c MARVELL MVNETA ETHERNET DRIVER -M: Thomas Petazzoni +M: Marcin Wojtas L: netdev@vger.kernel.org S: Maintained -F: drivers/net/ethernet/marvell/mvneta.* +F: drivers/net/ethernet/marvell/mvneta* MARVELL MVPP2 ETHERNET DRIVER M: Marcin Wojtas @@ -13125,6 +13147,14 @@ F: Documentation/devicetree/bindings/hwmon/adi,max31827.yaml F: Documentation/hwmon/max31827.rst F: drivers/hwmon/max31827.c +MAX31335 RTC DRIVER +M: Antoniu Miclaus +L: linux-rtc@vger.kernel.org +S: Supported +W: https://ez.analog.com/linux-software-drivers +F: Documentation/devicetree/bindings/rtc/adi,max31335.yaml +F: drivers/rtc/rtc-max31335.c + MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER L: linux-hwmon@vger.kernel.org S: Orphan @@ -13647,7 +13677,6 @@ F: drivers/dma/mediatek/ MEDIATEK ETHERNET DRIVER M: Felix Fietkau -M: John Crispin M: Sean Wang M: Mark Lee M: Lorenzo Bianconi @@ -13803,7 +13832,6 @@ F: include/soc/mediatek/smi.h MEDIATEK SWITCH DRIVER M: Arınç ÜNAL M: Daniel Golle -M: Landen Chao M: DENG Qingfang M: Sean Wang L: netdev@vger.kernel.org @@ -14810,6 +14838,13 @@ S: Maintained F: Documentation/driver-api/tty/moxa-smartio.rst F: drivers/tty/mxser.* +MP3309C BACKLIGHT DRIVER +M: Flavio Suligoi +L: dri-devel@lists.freedesktop.org +S: Maintained +F: Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml +F: drivers/video/backlight/mp3309c.c + MR800 AVERMEDIA USB FM RADIO DRIVER M: Alexey Klimov L: linux-media@vger.kernel.org @@ -17120,10 +17155,10 @@ PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra M: Ingo Molnar M: Arnaldo Carvalho de Melo +M: Namhyung Kim R: Mark Rutland R: Alexander Shishkin R: Jiri Olsa -R: Namhyung Kim R: Ian Rogers R: Adrian Hunter L: linux-perf-users@vger.kernel.org @@ -18443,7 +18478,6 @@ X: include/linux/srcu*.h X: kernel/rcu/srcu*.c REAL TIME CLOCK (RTC) SUBSYSTEM -M: Alessandro Zummo M: Alexandre Belloni L: linux-rtc@vger.kernel.org S: Maintained @@ -19131,7 +19165,7 @@ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER M: Alexandra Winter -M: Wenjia Zhang +M: Thorsten Winkler L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -19150,7 +19184,7 @@ F: arch/s390/mm S390 NETWORK DRIVERS M: Alexandra Winter -M: Wenjia Zhang +M: Thorsten Winkler L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -23009,6 +23043,7 @@ M: Alex Williamson L: kvm@vger.kernel.org S: Maintained T: git https://github.com/awilliam/linux-vfio.git +F: Documentation/ABI/testing/debugfs-vfio F: Documentation/ABI/testing/sysfs-devices-vfio-dev F: Documentation/driver-api/vfio.rst F: drivers/vfio/ @@ -23044,6 +23079,13 @@ L: kvm@vger.kernel.org S: Maintained F: drivers/vfio/pci/mlx5/ +VFIO VIRTIO PCI DRIVER +M: Yishai Hadas +L: kvm@vger.kernel.org +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/vfio/pci/virtio + VFIO PCI DEVICE SPECIFIC DRIVERS R: Jason Gunthorpe R: Yishai Hadas diff --git a/Makefile b/Makefile index c029ec7efde753f6ef55c2598881fd37a53ab9da..dbebcd4b95f479d5d359b092db1eee36be86efe9 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 7 +PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* @@ -173,6 +173,15 @@ export KBUILD_MIXED_TREE # TODO(b/205893923): Revert this hack once it is properly handled. export mixed-build-prefix +# backward compatibility +KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS) + +ifeq ("$(origin W)", "command line") + KBUILD_EXTRA_WARN := $(W) +endif + +export KBUILD_EXTRA_WARN + # Kbuild will save output files in the current working directory. # This does not need to match to the root of the kernel source tree. # @@ -199,14 +208,11 @@ ifeq ("$(origin O)", "command line") endif ifneq ($(KBUILD_OUTPUT),) -# Make's built-in functions such as $(abspath ...), $(realpath ...) cannot -# expand a shell special character '~'. We use a somewhat tedious way here. -abs_objtree := $(shell mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) -$(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - +# $(realpath ...) gets empty if the path does not exist. Run 'mkdir -p' first. +$(shell mkdir -p "$(KBUILD_OUTPUT)") # $(realpath ...) resolves symlinks -abs_objtree := $(realpath $(abs_objtree)) +abs_objtree := $(realpath $(KBUILD_OUTPUT)) +$(if $(abs_objtree),,$(error failed to create output directory "$(KBUILD_OUTPUT)")) endif # ifneq ($(KBUILD_OUTPUT),) ifneq ($(words $(subst :, ,$(abs_srctree))), 1) @@ -627,8 +633,6 @@ export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL KBUILD_RUSTFLAGS_KERNEL export RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o \ -name CVS -o -name .pc -o -name .hg -o -name .git \) \ -prune -o -export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ - --exclude CVS --exclude .pc --exclude .hg --exclude .git # =========================================================================== # Rules shared between *config targets and build targets @@ -1009,6 +1013,10 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) +#Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow @@ -1701,6 +1709,7 @@ help: @echo ' 1: warnings which may be relevant and do not occur too often' @echo ' 2: warnings which occur quite often but may still be relevant' @echo ' 3: more obscure warnings, can most likely be ignored' + @echo ' c: extra checks in the configuration stage (Kconfig)' @echo ' e: warnings are being treated as errors' @echo ' Multiple levels can be combined with W=12 or W=123' @$(if $(dtstree), \ diff --git a/arch/Kconfig b/arch/Kconfig index 5ca66aad0d0812f831a717561777273be7db9a9a..c91917b508736d1fa0d37d5bf3b1e4bf5550e211 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -301,6 +301,11 @@ config ARCH_HAS_DMA_CLEAR_UNCACHED config ARCH_HAS_CPU_FINALIZE_INIT bool +# The architecture has a per-task state that includes the mm's PASID +config ARCH_HAS_CPU_PASID + bool + select IOMMU_MM_DATA + config HAVE_ARCH_THREAD_STRUCT_WHITELIST bool help diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index fb3025396ac96477c497e966053a366d9e05200c..cfdf90bc8b3f862659726f09d167b301922c75bd 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -80,7 +80,7 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - int ret = mc146818_get_time(tm); + int ret = mc146818_get_time(tm, 10); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 563af3e75f01f2acb576110ef3210922a8dfb672..329c94cd45d8f68b3fa7866f92a6367a59f23530 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -40,6 +40,7 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); /* TBD: optimize this */ #define flush_cache_vmap(start, end) flush_cache_all() +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) flush_cache_all() #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a7fbbb83b7056e976fa446995d5bf33ecbf4764..197707bc7658898843d78bee7c0f02a10f7d26b9 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -91,7 +91,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, * Plug in direct dma map ops. */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { /* * IOC hardware snoops all DMA traffic keeping the caches consistent diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index feb38a94c1a70a49c3d3d997957d493585bff54f..43bc1255a5db9f4ed8f65a98a69284e1439e44d2 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -138,7 +138,8 @@ CONFIG_PWM_MXS=y CONFIG_NVMEM_MXS_OCOTP=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_FSCACHE_STATS=y CONFIG_CACHEFILES=m CONFIG_VFAT_FS=y diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index f6181f69577fe538dc4a13a85cc479784ad9ebef..1075534b0a2eeba73be7d0e83b9d0c995e1f8cf9 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -340,6 +340,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) dsb(ishst); } +#define flush_cache_vmap_early(start, end) do { } while (0) + static inline void flush_cache_vunmap(unsigned long start, unsigned long end) { if (!cache_is_vipt_nonaliasing()) diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index cfd9c933d2f09c6a80d86c9b50959728498cb37a..b94850b579952aefacbd1710bc3c317b4c4b77c9 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -34,7 +34,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { if (IS_ENABLED(CONFIG_CPU_V7M)) { /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index d688eac6dbc147db837cbffce186fb8c971b4df0..f68db05eba29fdaebb7d7cb8d9ec071e4f5f2910 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1710,7 +1710,7 @@ void arm_iommu_detach_device(struct device *dev) EXPORT_SYMBOL_GPL(arm_iommu_detach_device); static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { struct dma_iommu_mapping *mapping; @@ -1745,7 +1745,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) #else static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { } @@ -1754,7 +1754,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } #endif /* CONFIG_ARM_DMA_USE_IOMMU */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { /* * Due to legacy code that sets the ->dma_coherent flag from a bus @@ -1773,8 +1773,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (dev->dma_ops) return; - if (iommu) - arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent); + if (device_iommu_mapped(dev)) + arm_setup_iommu_dma_ops(dev, dma_base, size, coherent); xen_setup_dma_ops(dev); dev->archdata.dma_ops_setup = true; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ea01a2c43efaf6fd1fd1d19529d40279c4c91102..aa7c1d435139684d7b56f96f3f93945d331d64d6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1039,8 +1039,12 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + bool + config ARM64_ERRATUM_2966298 bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD default y help This option adds the workaround for ARM Cortex-A520 erratum 2966298. @@ -1052,6 +1056,20 @@ config ARM64_ERRATUM_2966298 If unsure, say Y. +config ARM64_ERRATUM_3117295 + bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 3117295. + + On an affected Cortex-A510 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7b1975bf4b90e7a999de178140bc92aeb63f1c02..513787e4332993e18ec82db1a47f7814ca553d4c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -760,32 +760,25 @@ alternative_endif .endm /* - * Check whether preempt/bh-disabled asm code should yield as soon as - * it is able. This is the case if we are currently running in task - * context, and either a softirq is pending, or the TIF_NEED_RESCHED - * flag is set and re-enabling preemption a single time would result in - * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is - * stored negated in the top word of the thread_info::preempt_count + * Check whether asm code should yield as soon as it is able. This is + * the case if we are currently running in task context, and the + * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag + * is stored negated in the top word of the thread_info::preempt_count * field) */ - .macro cond_yield, lbl:req, tmp:req, tmp2:req + .macro cond_yield, lbl:req, tmp:req, tmp2 +#ifdef CONFIG_PREEMPT_VOLUNTARY get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] /* * If we are serving a softirq, there is no point in yielding: the * softirq will not be preempted no matter what we do, so we should - * run to completion as quickly as we can. + * run to completion as quickly as we can. The preempt_count field will + * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will + * catch this case too. */ - tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ -#ifdef CONFIG_PREEMPTION - sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif - adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING - get_this_cpu_offset \tmp2 - ldr w\tmp, [\tmp, \tmp2] - cbnz w\tmp, \lbl // yield on pending softirq in task context -.Lnoyield_\@: .endm /* diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 50ce8b697ff361be7ee7fe571144b6a9e12ad374..e93548914c366f3476aea04c3bc35ff4b92c083d 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLER__ +#include + #include void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d95b3d6b471a7d63957c47151fd6cb404ca0f4c7..e5d03a7039b4bf9cce893b1ea39712eef3e2f4ad 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o -CFLAGS_patch-scs.o += -mbranch-protection=none + +# We need to prevent the SCS patching code from patching itself. Using +# -mbranch-protection=none here to avoid the patchable PAC opcodes from being +# generated triggers an issue with full LTO on Clang, which stops emitting PAC +# instructions altogether. So instead, omit the unwind tables used by the +# patching code, so it will not be able to locate its own PAC instructions. +CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5ff1942b04fcfd94e334b6b204f7f3885647c68d..5a7dbbe0ce639a8b7a74012c957b3c7335e8f160 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -117,8 +117,6 @@ int main(void) DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); - DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); - DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e29e0fea63fb626bea3abd2ad707b3eb08df7f5b..967c7c7a4e7db3db7e3d05a7637e8e7d13e0d273 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD +static const struct midr_range erratum_spec_unpriv_load_list[] = { +#ifdef CONFIG_ARM64_ERRATUM_3117295 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + /* Cortex-A520 r0p0 to r0p1 */ + MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), +#endif + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -713,12 +726,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), }, #endif -#ifdef CONFIG_ARM64_ERRATUM_2966298 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { - .desc = "ARM erratum 2966298", - .capability = ARM64_WORKAROUND_2966298, + .desc = "ARM errata 2966298, 3117295", + .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list), }, #endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a6030913cd58c44f1ce5cd7077fe61dac02c86db..7ef0e127b149fcb68ce4aaf83e1403c0648f289f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,16 +428,9 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_2966298 - tlbi vale1, xzr - dsb nsh -alternative_else_nop_endif -alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp - eret -alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0 + msr far_el1, x29 ldr_this_cpu x30, this_cpu_vector, x29 @@ -446,16 +439,26 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] // restore x30 add sp, sp, #PT_REGS_SIZE // restore sp br x29 + +.L_skip_tramp_exit_\@: #endif - .else + .endif + ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp + .if \el == 0 + /* This must be after the last explicit memory access */ +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif + .else /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + .endif eret - .endif sb .endm diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 505f389be3e0df09d5afa2c803a7e8b235d02e2a..a5dc6f764195847251dc25c196304cbef44d8850 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -898,10 +898,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, * allocate SVE now in case it is needed for use in streaming * mode. */ - if (system_supports_sve()) { - sve_free(task); - sve_alloc(task, true); - } + sve_free(task); + sve_alloc(task, true); if (free_sme) sme_free(task); @@ -1219,8 +1217,10 @@ void fpsimd_release_task(struct task_struct *dead_task) */ void sme_alloc(struct task_struct *task, bool flush) { - if (task->thread.sme_state && flush) { - memset(task->thread.sme_state, 0, sme_state_size(task)); + if (task->thread.sme_state) { + if (flush) + memset(task->thread.sme_state, 0, + sme_state_size(task)); return; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 09bb7fc7d3c2513b3bc8cb2e5545f8c31935e30a..dc6cf0e37194e428519d7d58524ad0f624f4bebb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1108,12 +1108,13 @@ static int za_set(struct task_struct *target, } } - /* Allocate/reinit ZA storage */ - sme_alloc(target, true); - if (!target->thread.sme_state) { - ret = -ENOMEM; - goto out; - } + /* + * Only flush the storage if PSTATE.ZA was not already set, + * otherwise preserve any existing data. + */ + sme_alloc(target, !thread_za_enabled(&target->thread)); + if (!target->thread.sme_state) + return -ENOMEM; /* If there is no data then disable ZA */ if (!count) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0d779208e702de4dc0ef8b0e363730a34850a0ad..9f395ce68470fdeeaabfe839ba8d947a4727c1bf 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -48,7 +48,7 @@ void arch_teardown_dma_ops(struct device *dev) #endif void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { int cls = cache_line_size_of_cpu(); @@ -59,7 +59,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ARCH_DMA_MINALIGN, cls); dev->dma_coherent = coherent; - if (iommu) { + if (device_iommu_mapped(dev)) { iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1e07d74d7a6c93baa2a01e7f63a5801c5fe3da0d..b912b1409fc09aaf08705b8d75b5d221ae0d020e 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,7 +84,6 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 -WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE @@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_SPECULATIVE_UNPRIV_LOAD diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 908d8b0bc4fdc645f6ff5b06965df704a97f49cf..d011a81575d21e08a85621e8a58c59ba8e5e7dd8 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -43,6 +43,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, */ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #define flush_cache_vmap(start, end) cache_wbinv_all() +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) cache_wbinv_all() #define flush_icache_range(start, end) cache_wbinv_range(start, end) diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 40be16907267d673581278512742567213187a38..6513ac5d257888fbd41385c9263305dfefd18de6 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -41,6 +41,7 @@ void flush_icache_mm_range(struct mm_struct *mm, void flush_icache_deferred(struct mm_struct *mm); #define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig index af722e4dfb47d8239c969b25834e9b231a9b4244..ff559e5162aa1cad8da170adc5d047672f849222 100644 --- a/arch/csky/configs/defconfig +++ b/arch/csky/configs/defconfig @@ -34,7 +34,8 @@ CONFIG_GENERIC_PHY=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_QUOTA=y -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_FSCACHE_STATS=y CONFIG_CACHEFILES=m CONFIG_MSDOS_FS=y diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index beb8499dd8ed84330beecbcd61977df0aa3474f8..bfa21465d83afcd4908cc000e49a8c47aea0d165 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -4,6 +4,7 @@ obj-y += net/ obj-y += vdso/ obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ # for cleaning subdir- += boot diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 15d05dd2b7f3de828c2aedfec7aa7fd917ddb300..10959e6c3583255264aef0ea7de6e6477a003418 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -142,6 +142,7 @@ config LOONGARCH select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK select HAVE_RSEQ + select HAVE_RUST select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SETUP_PER_CPU_AREA if NUMA @@ -376,6 +377,24 @@ config CMDLINE_FORCE endchoice +config BUILTIN_DTB + bool "Enable built-in dtb in kernel" + depends on OF + help + Some existing systems do not provide a canonical device tree to + the kernel at boot time. Let's provide a device tree table in the + kernel, keyed by the dts filename, containing the relevant DTBs. + + Built-in DTBs are generic enough and can be used as references. + +config BUILTIN_DTB_NAME + string "Source file for built-in dtb" + depends on BUILTIN_DTB + help + Base name (without suffix, relative to arch/loongarch/boot/dts/) + for the DTS file that will be used to produce the DTB linked into + the kernel. + config DMI bool "Enable DMI scanning" select DMI_SCAN_MACHINE_NON_EFI_FALLBACK @@ -577,6 +596,9 @@ config ARCH_SELECTS_CRASH_DUMP depends on CRASH_DUMP select RELOCATABLE +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_CORE + config RELOCATABLE bool "Relocatable kernel" help diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 4ba8d67ddb097743be4e68493604579142eee2d5..983aa2b1629a69fe74c4c8358d094fbd424283b9 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -6,6 +6,7 @@ boot := arch/loongarch/boot KBUILD_DEFCONFIG := loongson3_defconfig +KBUILD_DTBS := dtbs image-name-y := vmlinux image-name-$(CONFIG_EFI_ZBOOT) := vmlinuz @@ -81,8 +82,11 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs endif +KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic + ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS_KERNEL += -fPIE +KBUILD_RUSTFLAGS_KERNEL += -Crelocation-model=pie LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif @@ -141,7 +145,7 @@ endif vdso-install-y += arch/loongarch/vdso/vdso.so.dbg -all: $(notdir $(KBUILD_IMAGE)) +all: $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS) vmlinuz.efi: vmlinux.efi diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile index 5f1f55e911adf543ab5c113b06f81488ee984e59..747d0c3f63892926757b28fc29119069767e2310 100644 --- a/arch/loongarch/boot/dts/Makefile +++ b/arch/loongarch/boot/dts/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -dtstree := $(srctree)/$(src) -dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) +dtb-y = loongson-2k0500-ref.dtb loongson-2k1000-ref.dtb loongson-2k2000-ref.dtb + +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME)) diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts new file mode 100644 index 0000000000000000000000000000000000000000..b38071a4d0b023c7faf29935d3bb6d5e0c65ca76 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include "loongson-2k0500.dtsi" + +/ { + compatible = "loongson,ls2k0500-ref", "loongson,ls2k0500"; + model = "Loongson-2K0500 Reference Board"; + + aliases { + ethernet0 = &gmac0; + ethernet1 = &gmac1; + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + memory@200000 { + device_type = "memory"; + reg = <0x0 0x00200000 0x0 0x0ee00000>, + <0x0 0x90000000 0x0 0x60000000>; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + linux,cma { + compatible = "shared-dma-pool"; + reusable; + size = <0x0 0x2000000>; + linux,cma-default; + }; + }; +}; + +&gmac0 { + status = "okay"; + + phy-mode = "rgmii"; + bus_id = <0x0>; +}; + +&gmac1 { + status = "okay"; + + phy-mode = "rgmii"; + bus_id = <0x1>; +}; + +&i2c0 { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + eeprom@57{ + compatible = "atmel,24c16"; + reg = <0x57>; + pagesize = <16>; + }; +}; + +&ehci0 { + status = "okay"; +}; + +&ohci0 { + status = "okay"; +}; + +&sata { + status = "okay"; +}; + +&uart0 { + status = "okay"; +}; + +&rtc0 { + status = "okay"; +}; diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi new file mode 100644 index 0000000000000000000000000000000000000000..444779c21034b5aecd2faa6129ff3dd0dd670fe1 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include + +/ { + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "loongson,la264"; + device_type = "cpu"; + reg = <0x0>; + clocks = <&cpu_clk>; + }; + }; + + cpu_clk: cpu-clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <500000000>; + }; + + cpuintc: interrupt-controller { + compatible = "loongson,cpu-interrupt-controller"; + #interrupt-cells = <1>; + interrupt-controller; + }; + + bus@10000000 { + compatible = "simple-bus"; + ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>, + <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>, + <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>, + <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>, + <0xfe 0x0 0xfe 0x0 0x0 0x40000000>; + #address-cells = <2>; + #size-cells = <2>; + + isa@16400000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x16400000 0x4000>; + }; + + liointc0: interrupt-controller@1fe11400 { + compatible = "loongson,liointc-2.0"; + reg = <0x0 0x1fe11400 0x0 0x40>, + <0x0 0x1fe11040 0x0 0x8>; + reg-names = "main", "isr0"; + + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + interrupt-names = "int0"; + + loongson,parent_int_map = <0xffffffff>, /* int0 */ + <0x00000000>, /* int1 */ + <0x00000000>, /* int2 */ + <0x00000000>; /* int3 */ + }; + + liointc1: interrupt-controller@1fe11440 { + compatible = "loongson,liointc-2.0"; + reg = <0x0 0x1fe11440 0x0 0x40>, + <0x0 0x1fe11048 0x0 0x8>; + reg-names = "main", "isr0"; + + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpuintc>; + interrupts = <4>; + interrupt-names = "int2"; + + loongson,parent_int_map = <0x00000000>, /* int0 */ + <0x00000000>, /* int1 */ + <0xffffffff>, /* int2 */ + <0x00000000>; /* int3 */ + }; + + eiointc: interrupt-controller@1fe11600 { + compatible = "loongson,ls2k0500-eiointc"; + reg = <0x0 0x1fe11600 0x0 0xea00>; + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&cpuintc>; + interrupts = <3>; + }; + + gmac0: ethernet@1f020000 { + compatible = "snps,dwmac-3.70a"; + reg = <0x0 0x1f020000 0x0 0x10000>; + interrupt-parent = <&liointc0>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + status = "disabled"; + }; + + gmac1: ethernet@1f030000 { + compatible = "snps,dwmac-3.70a"; + reg = <0x0 0x1f030000 0x0 0x10000>; + interrupt-parent = <&liointc0>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + status = "disabled"; + }; + + sata: sata@1f040000 { + compatible = "snps,spear-ahci"; + reg = <0x0 0x1f040000 0x0 0x10000>; + interrupt-parent = <&eiointc>; + interrupts = <75>; + status = "disabled"; + }; + + ehci0: usb@1f050000 { + compatible = "generic-ehci"; + reg = <0x0 0x1f050000 0x0 0x8000>; + interrupt-parent = <&eiointc>; + interrupts = <71>; + status = "disabled"; + }; + + ohci0: usb@1f058000 { + compatible = "generic-ohci"; + reg = <0x0 0x1f058000 0x0 0x8000>; + interrupt-parent = <&eiointc>; + interrupts = <72>; + status = "disabled"; + }; + + uart0: serial@1ff40800 { + compatible = "ns16550a"; + reg = <0x0 0x1ff40800 0x0 0x10>; + clock-frequency = <100000000>; + interrupt-parent = <&eiointc>; + interrupts = <2>; + no-loopback-test; + status = "disabled"; + }; + + i2c0: i2c@1ff48000 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff48000 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <14>; + status = "disabled"; + }; + + i2c@1ff48800 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff48800 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <15>; + status = "disabled"; + }; + + i2c@1ff49000 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff49000 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <16>; + status = "disabled"; + }; + + i2c@1ff49800 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff49800 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <17>; + status = "disabled"; + }; + + i2c@1ff4a000 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff4a000 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <18>; + status = "disabled"; + }; + + i2c@1ff4a800 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1ff4a800 0x0 0x0800>; + interrupt-parent = <&eiointc>; + interrupts = <19>; + status = "disabled"; + }; + + pmc: power-management@1ff6c000 { + compatible = "loongson,ls2k0500-pmc", "syscon"; + reg = <0x0 0x1ff6c000 0x0 0x58>; + interrupt-parent = <&eiointc>; + interrupts = <56>; + loongson,suspend-address = <0x0 0x1c000500>; + + syscon-reboot { + compatible = "syscon-reboot"; + offset = <0x30>; + mask = <0x1>; + }; + + syscon-poweroff { + compatible = "syscon-poweroff"; + regmap = <&pmc>; + offset = <0x14>; + mask = <0x3c00>; + value = <0x3c00>; + }; + }; + + rtc0: rtc@1ff6c100 { + compatible = "loongson,ls2k0500-rtc", "loongson,ls7a-rtc"; + reg = <0x0 0x1ff6c100 0x0 0x100>; + interrupt-parent = <&eiointc>; + interrupts = <35>; + status = "disabled"; + }; + + pcie@1a000000 { + compatible = "loongson,ls2k-pci"; + reg = <0x0 0x1a000000 0x0 0x02000000>, + <0xfe 0x0 0x0 0x20000000>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + bus-range = <0x0 0x5>; + ranges = <0x01000000 0x0 0x00004000 0x0 0x16404000 0x0 0x00004000>, + <0x02000000 0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>; + + pcie@0,0 { + reg = <0x0000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&eiointc>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 81>; + ranges; + }; + + pcie@1,0 { + reg = <0x0800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&eiointc>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 82>; + ranges; + }; + }; + }; +}; diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts new file mode 100644 index 0000000000000000000000000000000000000000..132a2d1ea8bce1ac95222875b6ad74d5ebf06b14 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include "loongson-2k1000.dtsi" + +/ { + compatible = "loongson,ls2k1000-ref", "loongson,ls2k1000"; + model = "Loongson-2K1000 Reference Board"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + memory@200000 { + device_type = "memory"; + reg = <0x0 0x00200000 0x0 0x06e00000>, + <0x0 0x08000000 0x0 0x07000000>, + <0x0 0x90000000 0x1 0xe0000000>; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + linux,cma { + compatible = "shared-dma-pool"; + reusable; + size = <0x0 0x2000000>; + linux,cma-default; + }; + }; +}; + +&gmac0 { + status = "okay"; + + phy-mode = "rgmii"; + phy-handle = <&phy0>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; +}; + +&gmac1 { + status = "okay"; + + phy-mode = "rgmii"; + phy-handle = <&phy1>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy1: ethernet-phy@1 { + reg = <16>; + }; + }; +}; + +&i2c2 { + status = "okay"; + + pinctrl-0 = <&i2c0_pins_default>; + pinctrl-names = "default"; + + #address-cells = <1>; + #size-cells = <0>; + eeprom@57{ + compatible = "atmel,24c16"; + reg = <0x57>; + pagesize = <16>; + }; +}; + +&spi0 { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + spidev@0 { + compatible = "rohm,dh2228fv"; + spi-max-frequency = <100000000>; + reg = <0>; + }; +}; + +&ehci0 { + status = "okay"; +}; + +&ohci0 { + status = "okay"; +}; + +&sata { + status = "okay"; +}; + +&uart0 { + status = "okay"; +}; + +&clk { + status = "okay"; +}; + +&rtc0 { + status = "okay"; +}; + +&pctrl { + status = "okay"; + + sdio_pins_default: sdio-pins { + sdio-pinmux { + groups = "sdio"; + function = "sdio"; + }; + sdio-det-pinmux { + groups = "pwm2"; + function = "gpio"; + }; + }; + + pwm1_pins_default: pwm1-pins { + pinmux { + groups = "pwm1"; + function = "pwm1"; + }; + }; + + pwm0_pins_default: pwm0-pins { + pinmux { + groups = "pwm0"; + function = "pwm0"; + }; + }; + + i2c1_pins_default: i2c1-pins { + pinmux { + groups = "i2c1"; + function = "i2c1"; + }; + }; + + i2c0_pins_default: i2c0-pins { + pinmux { + groups = "i2c0"; + function = "i2c0"; + }; + }; + + nand_pins_default: nand-pins { + pinmux { + groups = "nand"; + function = "nand"; + }; + }; + + hda_pins_default: hda-pins { + grp0-pinmux { + groups = "hda"; + function = "hda"; + }; + grp1-pinmux { + groups = "i2s"; + function = "gpio"; + }; + }; +}; diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi new file mode 100644 index 0000000000000000000000000000000000000000..49a70f8c3cab22b758dd290a9fab2374a62abae9 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include +#include +#include + +/ { + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "loongson,la264"; + device_type = "cpu"; + reg= <0x0>; + clocks = <&clk LOONGSON2_NODE_CLK>; + }; + + cpu1: cpu@1 { + compatible = "loongson,la264"; + device_type = "cpu"; + reg = <0x1>; + clocks = <&clk LOONGSON2_NODE_CLK>; + }; + }; + + ref_100m: clock-ref-100m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <100000000>; + clock-output-names = "ref_100m"; + }; + + cpuintc: interrupt-controller { + compatible = "loongson,cpu-interrupt-controller"; + #interrupt-cells = <1>; + interrupt-controller; + }; + + /* i2c of the dvi eeprom edid */ + i2c-gpio-0 { + compatible = "i2c-gpio"; + scl-gpios = <&gpio0 0 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio0 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + i2c-gpio,delay-us = <5>; /* ~100 kHz */ + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + /* i2c of the eeprom edid */ + i2c-gpio-1 { + compatible = "i2c-gpio"; + scl-gpios = <&gpio0 33 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio0 32 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + i2c-gpio,delay-us = <5>; /* ~100 kHz */ + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + thermal-zones { + cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + thermal-sensors = <&tsensor 0>; + + trips { + cpu_alert: cpu-alert { + temperature = <33000>; + hysteresis = <2000>; + type = "active"; + }; + + cpu_crit: cpu-crit { + temperature = <85000>; + hysteresis = <5000>; + type = "critical"; + }; + }; + }; + }; + + bus@10000000 { + compatible = "simple-bus"; + ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>, + <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>, + <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>, + <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>, + <0xfe 0x0 0xfe 0x0 0x0 0x40000000>; + #address-cells = <2>; + #size-cells = <2>; + dma-coherent; + + liointc0: interrupt-controller@1fe01400 { + compatible = "loongson,liointc-2.0"; + reg = <0x0 0x1fe01400 0x0 0x40>, + <0x0 0x1fe01040 0x0 0x8>, + <0x0 0x1fe01140 0x0 0x8>; + reg-names = "main", "isr0", "isr1"; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + interrupt-names = "int0"; + loongson,parent_int_map = <0xffffffff>, /* int0 */ + <0x00000000>, /* int1 */ + <0x00000000>, /* int2 */ + <0x00000000>; /* int3 */ + }; + + liointc1: interrupt-controller@1fe01440 { + compatible = "loongson,liointc-2.0"; + reg = <0x0 0x1fe01440 0x0 0x40>, + <0x0 0x1fe01048 0x0 0x8>, + <0x0 0x1fe01148 0x0 0x8>; + reg-names = "main", "isr0", "isr1"; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpuintc>; + interrupts = <3>; + interrupt-names = "int1"; + loongson,parent_int_map = <0x00000000>, /* int0 */ + <0xffffffff>, /* int1 */ + <0x00000000>, /* int2 */ + <0x00000000>; /* int3 */ + }; + + chipid@1fe00000 { + compatible = "loongson,ls2k-chipid"; + reg = <0x0 0x1fe00000 0x0 0x30>; + little-endian; + }; + + pctrl: pinctrl@1fe00420 { + compatible = "loongson,ls2k-pinctrl"; + reg = <0x0 0x1fe00420 0x0 0x18>; + status = "disabled"; + }; + + clk: clock-controller@1fe00480 { + compatible = "loongson,ls2k-clk"; + reg = <0x0 0x1fe00480 0x0 0x58>; + #clock-cells = <1>; + clocks = <&ref_100m>; + clock-names = "ref_100m"; + status = "disabled"; + }; + + gpio0: gpio@1fe00500 { + compatible = "loongson,ls2k-gpio"; + reg = <0x0 0x1fe00500 0x0 0x38>; + ngpios = <64>; + #gpio-cells = <2>; + gpio-controller; + gpio-ranges = <&pctrl 0x0 0x0 15>, + <&pctrl 16 16 15>, + <&pctrl 32 32 10>, + <&pctrl 44 44 20>; + interrupt-parent = <&liointc1>; + interrupts = <28 IRQ_TYPE_LEVEL_HIGH>, + <29 IRQ_TYPE_LEVEL_HIGH>, + <30 IRQ_TYPE_LEVEL_HIGH>, + <30 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <26 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <>, + <>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>, + <27 IRQ_TYPE_LEVEL_HIGH>; + }; + + tsensor: thermal-sensor@1fe01500 { + compatible = "loongson,ls2k1000-thermal"; + reg = <0x0 0x1fe01500 0x0 0x30>; + interrupt-parent = <&liointc0>; + interrupts = <7 IRQ_TYPE_LEVEL_HIGH>; + #thermal-sensor-cells = <1>; + }; + + dma-controller@1fe00c00 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x0 0x1fe00c00 0x0 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe00c10 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x0 0x1fe00c10 0x0 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <13 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe00c20 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x0 0x1fe00c20 0x0 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe00c30 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x0 0x1fe00c30 0x0 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <15 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe00c40 { + compatible = "loongson,ls2k1000-apbdma"; + reg = <0x0 0x1fe00c40 0x0 0x8>; + interrupt-parent = <&liointc1>; + interrupts = <16 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + uart0: serial@1fe20000 { + compatible = "ns16550a"; + reg = <0x0 0x1fe20000 0x0 0x10>; + clock-frequency = <125000000>; + interrupt-parent = <&liointc0>; + interrupts = <0x0 IRQ_TYPE_LEVEL_HIGH>; + no-loopback-test; + status = "disabled"; + }; + + i2c2: i2c@1fe21000 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1fe21000 0x0 0x8>; + interrupt-parent = <&liointc0>; + interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c3: i2c@1fe21800 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1fe21800 0x0 0x8>; + interrupt-parent = <&liointc0>; + interrupts = <23 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + pmc: power-management@1fe27000 { + compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon"; + reg = <0x0 0x1fe27000 0x0 0x58>; + interrupt-parent = <&liointc1>; + interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + loongson,suspend-address = <0x0 0x1c000500>; + + syscon-reboot { + compatible = "syscon-reboot"; + offset = <0x30>; + mask = <0x1>; + }; + + syscon-poweroff { + compatible = "syscon-poweroff"; + regmap = <&pmc>; + offset = <0x14>; + mask = <0x3c00>; + value = <0x3c00>; + }; + }; + + rtc0: rtc@1fe27800 { + compatible = "loongson,ls2k1000-rtc"; + reg = <0x0 0x1fe27800 0x0 0x100>; + interrupt-parent = <&liointc1>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + spi0: spi@1fff0220 { + compatible = "loongson,ls2k1000-spi"; + reg = <0x0 0x1fff0220 0x0 0x10>; + clocks = <&clk LOONGSON2_BOOT_CLK>; + status = "disabled"; + }; + + pcie@1a000000 { + compatible = "loongson,ls2k-pci"; + reg = <0x0 0x1a000000 0x0 0x02000000>, + <0xfe 0x0 0x0 0x20000000>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + bus-range = <0x0 0xff>; + ranges = <0x01000000 0x0 0x00008000 0x0 0x18008000 0x0 0x00008000>, + <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>; + + gmac0: ethernet@3,0 { + reg = <0x1800 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc0>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, + <13 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; + status = "disabled"; + }; + + gmac1: ethernet@3,1 { + reg = <0x1900 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc0>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; + status = "disabled"; + }; + + ehci0: usb@4,1 { + reg = <0x2100 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc1>; + interrupts = <18 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + ohci0: usb@4,2 { + reg = <0x2200 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc1>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + display@6,0 { + reg = <0x3000 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc0>; + interrupts = <28 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + hda@7,0 { + reg = <0x3800 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc0>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + sata: sata@8,0 { + reg = <0x4000 0x0 0x0 0x0 0x0>; + interrupt-parent = <&liointc0>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + pcie@9,0 { + reg = <0x4800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 0x0 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@a,0 { + reg = <0x5000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&liointc1>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 1 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@b,0 { + reg = <0x5800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&liointc1>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 2 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@c,0 { + reg = <0x6000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&liointc1>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 3 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@d,0 { + reg = <0x6800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&liointc1>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 4 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@e,0 { + reg = <0x7000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&liointc1>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 5 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + }; + }; +}; diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts new file mode 100644 index 0000000000000000000000000000000000000000..dca91caf895e3cd9e428e75b91da9392bfb49d82 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include "loongson-2k2000.dtsi" + +/ { + compatible = "loongson,ls2k2000-ref", "loongson,ls2k2000"; + model = "Loongson-2K2000 Reference Board"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + memory@200000 { + device_type = "memory"; + reg = <0x0 0x00200000 0x0 0x0ee00000>, + <0x0 0x90000000 0x0 0x70000000>; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + linux,cma { + compatible = "shared-dma-pool"; + reusable; + size = <0x0 0x2000000>; + linux,cma-default; + }; + }; +}; + +&sata { + status = "okay"; +}; + +&uart0 { + status = "okay"; +}; + +&rtc0 { + status = "okay"; +}; + +&xhci0 { + status = "okay"; +}; + +&xhci1 { + status = "okay"; +}; + +&gmac0 { + status = "okay"; +}; + +&gmac1 { + status = "okay"; +}; + +&gmac2 { + status = "okay"; +}; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi new file mode 100644 index 0000000000000000000000000000000000000000..a231949b5f553a3814f48f6875e65ac2ed73d09a --- /dev/null +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +/dts-v1/; + +#include + +/ { + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@1 { + compatible = "loongson,la364"; + device_type = "cpu"; + reg = <0x0>; + clocks = <&cpu_clk>; + }; + + cpu1: cpu@2 { + compatible = "loongson,la364"; + device_type = "cpu"; + reg = <0x1>; + clocks = <&cpu_clk>; + }; + }; + + cpu_clk: cpu-clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1400000000>; + }; + + cpuintc: interrupt-controller { + compatible = "loongson,cpu-interrupt-controller"; + #interrupt-cells = <1>; + interrupt-controller; + }; + + bus@10000000 { + compatible = "simple-bus"; + ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>, + <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>, + <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>, + <0xfe 0x0 0xfe 0x0 0x0 0x40000000>; + #address-cells = <2>; + #size-cells = <2>; + + pmc: power-management@100d0000 { + compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon"; + reg = <0x0 0x100d0000 0x0 0x58>; + interrupt-parent = <&eiointc>; + interrupts = <47>; + loongson,suspend-address = <0x0 0x1c000500>; + + syscon-reboot { + compatible = "syscon-reboot"; + offset = <0x30>; + mask = <0x1>; + }; + + syscon-poweroff { + compatible = "syscon-poweroff"; + regmap = <&pmc>; + offset = <0x14>; + mask = <0x3c00>; + value = <0x3c00>; + }; + }; + + liointc: interrupt-controller@1fe01400 { + compatible = "loongson,liointc-1.0"; + reg = <0x0 0x1fe01400 0x0 0x64>; + + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + interrupt-names = "int0"; + loongson,parent_int_map = <0xffffffff>, /* int0 */ + <0x00000000>, /* int1 */ + <0x00000000>, /* int2 */ + <0x00000000>; /* int3 */ + }; + + eiointc: interrupt-controller@1fe01600 { + compatible = "loongson,ls2k2000-eiointc"; + reg = <0x0 0x1fe01600 0x0 0xea00>; + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&cpuintc>; + interrupts = <3>; + }; + + pic: interrupt-controller@10000000 { + compatible = "loongson,pch-pic-1.0"; + reg = <0x0 0x10000000 0x0 0x400>; + interrupt-controller; + #interrupt-cells = <2>; + loongson,pic-base-vec = <0>; + interrupt-parent = <&eiointc>; + }; + + msi: msi-controller@1fe01140 { + compatible = "loongson,pch-msi-1.0"; + reg = <0x0 0x1fe01140 0x0 0x8>; + msi-controller; + loongson,msi-base-vec = <64>; + loongson,msi-num-vecs = <192>; + interrupt-parent = <&eiointc>; + }; + + rtc0: rtc@100d0100 { + compatible = "loongson,ls2k2000-rtc", "loongson,ls7a-rtc"; + reg = <0x0 0x100d0100 0x0 0x100>; + interrupt-parent = <&pic>; + interrupts = <52 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + uart0: serial@1fe001e0 { + compatible = "ns16550a"; + reg = <0x0 0x1fe001e0 0x0 0x10>; + clock-frequency = <100000000>; + interrupt-parent = <&liointc>; + interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; + no-loopback-test; + status = "disabled"; + }; + + pcie@1a000000 { + compatible = "loongson,ls2k-pci"; + reg = <0x0 0x1a000000 0x0 0x02000000>, + <0xfe 0x0 0x0 0x20000000>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + bus-range = <0x0 0xff>; + ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>, + <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>; + + gmac0: ethernet@3,0 { + reg = <0x1800 0x0 0x0 0x0 0x0>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + gmac1: ethernet@3,1 { + reg = <0x1900 0x0 0x0 0x0 0x0>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + gmac2: ethernet@3,2 { + reg = <0x1a00 0x0 0x0 0x0 0x0>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + xhci0: usb@4,0 { + reg = <0x2000 0x0 0x0 0x0 0x0>; + interrupts = <48 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + xhci1: usb@19,0 { + reg = <0xc800 0x0 0x0 0x0 0x0>; + interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + display@6,1 { + reg = <0x3100 0x0 0x0 0x0 0x0>; + interrupts = <28 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + hda@7,0 { + reg = <0x3800 0x0 0x0 0x0 0x0>; + interrupts = <58 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + sata: sata@8,0 { + reg = <0x4000 0x0 0x0 0x0 0x0>; + interrupts = <16 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&pic>; + status = "disabled"; + }; + + pcie@9,0 { + reg = <0x4800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 32 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@a,0 { + reg = <0x5000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 33 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@b,0 { + reg = <0x5800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 34 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@c,0 { + reg = <0x6000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 35 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@d,0 { + reg = <0x6800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 36 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@e,0 { + reg = <0x7000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 37 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@f,0 { + reg = <0x7800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 40 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + + pcie@10,0 { + reg = <0x8000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + interrupt-parent = <&pic>; + #interrupt-cells = <1>; + interrupt-map-mask = <0x0 0x0 0x0 0x0>; + interrupt-map = <0x0 0x0 0x0 0x0 &pic 30 IRQ_TYPE_LEVEL_HIGH>; + ranges; + }; + }; + }; +}; diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 60e331af98398149df56cfed7bc7b27abe21c526..f18c2ba871eff6c6c1b84808f0e9f7296c854db3 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -6,6 +6,8 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_PREEMPT=y +CONFIG_PREEMPT_DYNAMIC=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -19,6 +21,7 @@ CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y @@ -26,6 +29,7 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_MISC=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y @@ -35,6 +39,8 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y CONFIG_LOONGARCH=y CONFIG_64BIT=y CONFIG_MACH_LOONGSON64=y @@ -44,13 +50,11 @@ CONFIG_DMI=y CONFIG_EFI=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y -CONFIG_NR_CPUS=64 +CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_CPU_HAS_FPU=y CONFIG_CPU_HAS_LSX=y CONFIG_CPU_HAS_LASX=y -CONFIG_KEXEC=y -CONFIG_CRASH_DUMP=y CONFIG_RANDOMIZE_BASE=y CONFIG_SUSPEND=y CONFIG_HIBERNATION=y @@ -62,10 +66,6 @@ CONFIG_ACPI_IPMI=m CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_PCI_SLOT=y CONFIG_ACPI_HOTPLUG_MEMORY=y -CONFIG_EFI_ZBOOT=y -CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y -CONFIG_EFI_CAPSULE_LOADER=m -CONFIG_EFI_TEST=m CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y @@ -74,10 +74,18 @@ CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_DEV_THROTTLING_LOW=y +CONFIG_BLK_WBT=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_FC_APPID=y +CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_PARTITION_ADVANCED=y CONFIG_BSD_DISKLABEL=y CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CMDLINE_PARTITION=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m @@ -93,6 +101,8 @@ CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CMA=y +CONFIG_CMA_SYSFS=y CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -128,6 +138,7 @@ CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_INET6_ESP=m CONFIG_IPV6_MROUTE=y +CONFIG_MPTCP=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m @@ -352,6 +363,7 @@ CONFIG_PCIEAER=y CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_SHPC=y +CONFIG_PCI_HOST_GENERIC=y CONFIG_PCCARD=m CONFIG_YENTA=m CONFIG_RAPIDIO=y @@ -365,6 +377,10 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_FW_LOADER_COMPRESS=y CONFIG_FW_LOADER_COMPRESS_ZSTD=y +CONFIG_EFI_ZBOOT=y +CONFIG_EFI_BOOTLOADER_CONTROL=m +CONFIG_EFI_CAPSULE_LOADER=m +CONFIG_EFI_TEST=m CONFIG_MTD=m CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=m @@ -586,6 +602,7 @@ CONFIG_RTW89_8852AE=m CONFIG_RTW89_8852CE=m CONFIG_ZD1211RW=m CONFIG_USB_NET_RNDIS_WLAN=m +CONFIG_USB4_NET=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_EVDEV=y @@ -691,6 +708,9 @@ CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_HDMI=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m +CONFIG_SND_SOC=m +CONFIG_SND_SOC_LOONGSON_CARD=m +CONFIG_SND_VIRTIO=m CONFIG_HIDRAW=y CONFIG_UHID=m CONFIG_HID_A4TECH=m @@ -738,6 +758,11 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y CONFIG_RTC_DRV_LOONGSON=y CONFIG_DMADEVICES=y +CONFIG_LS2X_APB_DMA=y +CONFIG_UDMABUF=y +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y +CONFIG_DMABUF_HEAPS_CMA=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_DMEM_GENIRQ=m @@ -778,7 +803,15 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y CONFIG_DEVFREQ_GOV_POWERSAVE=y CONFIG_DEVFREQ_GOV_USERSPACE=y +CONFIG_NTB=m +CONFIG_NTB_MSI=y +CONFIG_NTB_IDT=m +CONFIG_NTB_EPF=m +CONFIG_NTB_SWITCHTEC=m +CONFIG_NTB_PERF=m +CONFIG_NTB_TRANSPORT=m CONFIG_PWM=y +CONFIG_USB4=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -797,6 +830,10 @@ CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_F2FS_FS=m +CONFIG_F2FS_FS_SECURITY=y +CONFIG_F2FS_CHECK_FS=y +CONFIG_F2FS_FS_COMPRESSION=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -883,7 +920,6 @@ CONFIG_KEY_DH_OPERATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_YAMA=y CONFIG_DEFAULT_SECURITY_DAC=y @@ -914,6 +950,9 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_CRC32_LOONGARCH=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_DMA_CMA=y +CONFIG_DMA_NUMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index c60796869b2b80377d9d6afca9c8705f8d2433e1..6d5846dd075cbdde654760422fac5d9536605d4a 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -24,13 +24,15 @@ struct loongson_board_info { const char *board_vendor; }; +#define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG) + struct loongson_system_configuration { int nr_cpus; int nr_nodes; int boot_cpu_id; int cores_per_node; int cores_per_package; - unsigned long cores_io_master; + unsigned long cores_io_master[NR_WORDS]; unsigned long suspend_addr; const char *cpuname; }; @@ -42,7 +44,7 @@ extern struct loongson_system_configuration loongson_sysconf; static inline bool io_master(int cpu) { - return test_bit(cpu, &loongson_sysconf.cores_io_master); + return test_bit(cpu, loongson_sysconf.cores_io_master); } #endif /* _ASM_BOOTINFO_H */ diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_core.h new file mode 100644 index 0000000000000000000000000000000000000000..218bdbfa527ba861364cbbb6601369036ee293bf --- /dev/null +++ b/arch/loongarch/include/asm/crash_core.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LOONGARCH_CRASH_CORE_H +#define _LOONGARCH_CRASH_CORE_H + +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX SZ_4G +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() + +extern phys_addr_t memblock_end_of_DRAM(void); + +#endif diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 9b16a3b8e70608c8765f838cfd21925d4fe51145..f16bd42456e4ccf3ad6c8917165176b8ef5d8f05 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -241,8 +241,6 @@ void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs); do { \ current->thread.vdso = &vdso_info; \ \ - loongarch_set_personality_fcsr(state); \ - \ if (personality(current->personality) != PER_LINUX) \ set_personality(PER_LINUX); \ } while (0) @@ -259,7 +257,6 @@ do { \ clear_thread_flag(TIF_32BIT_ADDR); \ \ current->thread.vdso = &vdso_info; \ - loongarch_set_personality_fcsr(state); \ \ p = personality(current->personality); \ if (p != PER_LINUX32 && p != PER_LINUX) \ @@ -340,6 +337,4 @@ extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf, extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr, struct arch_elf_state *state); -extern void loongarch_set_personality_fcsr(struct arch_elf_state *state); - #endif /* _ASM_ELF_H */ diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index a11996eb5892dd169a1e5a0ba9ad20fb854f4be8..de891c2c83d4a980284cc5376dbc0934b7233a13 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -63,7 +63,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - regs_set_return_value(&fregs->regs, ip); + instruction_pointer_set(&fregs->regs, ip); } #define ftrace_regs_get_argument(fregs, n) \ diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h deleted file mode 100644 index c9554f48d2dfab400add2d2c4e30ff6e44f1db14..0000000000000000000000000000000000000000 --- a/arch/loongarch/include/asm/shmparam.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_SHMPARAM_H -#define _ASM_SHMPARAM_H - -#define __ARCH_FORCE_SHMLBA 1 - -#define SHMLBA SZ_64K /* attach addr a multiple of this */ - -#endif /* _ASM_SHMPARAM_H */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 8e00a754e548943ae4dba5d330a1354426c713d1..b6b097bbf8668a400105dd408c696e4135c824d8 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -119,7 +119,7 @@ acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long e return -EINVAL; core = eiointc->node * CORES_PER_EIO_NODE; - set_bit(core, &(loongson_sysconf.cores_io_master)); + set_bit(core, loongson_sysconf.cores_io_master); return 0; } diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index acb5d3385675c974d98a71b8b659ed1088914acd..000825406c1f62cdebd32e79714738987d20d5cc 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -140,4 +140,6 @@ void __init efi_init(void) early_memunmap(tbl, sizeof(*tbl)); } + + efi_esrt_init(); } diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 183e94fc9c69ce8761f3d70b730558bd7f26ff55..0fa81ced28dcdd053cf79f0aaffa8b127df482e1 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -23,8 +23,3 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr, { return 0; } - -void loongarch_set_personality_fcsr(struct arch_elf_state *state) -{ - current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; -} diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 6b3bfb0092e60b34946490415ff7cd2a51287886..2f1f5b08638f818c2abb7a617b3b79250812b3b8 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -5,13 +5,16 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include +#include #include #include #include #include +#include u64 efi_system_table; struct loongson_system_configuration loongson_sysconf; @@ -36,7 +39,16 @@ void __init init_environ(void) static int __init init_cpu_fullname(void) { - int cpu; + struct device_node *root; + int cpu, ret; + char *model; + + /* Parsing cpuname from DTS model property */ + root = of_find_node_by_path("/"); + ret = of_property_read_string(root, "model", (const char **)&model); + of_node_put(root); + if (ret == 0) + loongson_sysconf.cpuname = strsep(&model, " "); if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { for (cpu = 0; cpu < NR_CPUS; cpu++) @@ -46,6 +58,26 @@ static int __init init_cpu_fullname(void) } arch_initcall(init_cpu_fullname); +static int __init fdt_cpu_clk_init(void) +{ + struct clk *clk; + struct device_node *np; + + np = of_get_cpu_node(0, NULL); + if (!np) + return -ENODEV; + + clk = of_clk_get(np, 0); + if (IS_ERR(clk)) + return -ENODEV; + + cpu_clock_freq = clk_get_rate(clk); + clk_put(clk); + + return 0; +} +late_initcall(fdt_cpu_clk_init); + static ssize_t boardinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 0ecab4216392899cf8655d0d67228cb280c6df02..c4f7de2e28054ceb6458c964c26e86596cea8602 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -74,6 +74,11 @@ SYM_CODE_START(kernel_entry) # kernel entry point la.pcrel t0, fw_arg2 st.d a2, t0, 0 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* KSave3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ @@ -126,6 +131,11 @@ SYM_CODE_START(smpboot_entry) JUMP_VIRT_ADDR t0, t1 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 767d94cce0de07d74892733b339a55dd5e6ded0e..f2ff8b5d591e4fd638109d2c98d75543c01a112c 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -85,6 +85,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) regs->csr_euen = euen; lose_fpu(0); lose_lbt(0); + current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; clear_thread_flag(TIF_LSX_CTX_LIVE); clear_thread_flag(TIF_LASX_CTX_LIVE); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index d183a745fb85d4efcef51bbdab6d09a1e047b966..edf2bba80130670364e144ad301868a7dfd3bf93 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -252,38 +252,23 @@ static void __init arch_reserve_vmcore(void) #endif } -/* 2MB alignment for crash kernel regions */ -#define CRASH_ALIGN SZ_2M -#define CRASH_ADDR_MAX SZ_4G - -static void __init arch_parse_crashkernel(void) +static void __init arch_reserve_crashkernel(void) { -#ifdef CONFIG_KEXEC int ret; - unsigned long long total_mem; + unsigned long long low_size = 0; unsigned long long crash_base, crash_size; + char *cmdline = boot_command_line; + bool high = false; - total_mem = memblock_phys_mem_size(); - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base, - NULL, NULL); - if (ret < 0 || crash_size <= 0) + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; - if (crash_base <= 0) { - crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX); - if (!crash_base) { - pr_warn("crashkernel reservation failed - No suitable area found.\n"); - return; - } - } else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) { - pr_warn("Invalid memory region reserved for crash kernel\n"); + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + &crash_size, &crash_base, &low_size, &high); + if (ret) return; - } - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; -#endif + reserve_crashkernel_generic(cmdline, crash_size, crash_base, low_size, high); } static void __init fdt_setup(void) @@ -295,8 +280,12 @@ static void __init fdt_setup(void) if (acpi_os_get_root_pointer()) return; - /* Look for a device tree configuration table entry */ - fdt_pointer = efi_fdt_pointer(); + /* Prefer to use built-in dtb, checking its legality first. */ + if (!fdt_check_header(__dtb_start)) + fdt_pointer = __dtb_start; + else + fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */ + if (!fdt_pointer || fdt_check_header(fdt_pointer)) return; @@ -330,7 +319,9 @@ static void __init bootcmdline_init(char **cmdline_p) if (boot_command_line[0]) strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE); + if (!strstr(boot_command_line, init_command_line)) + strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE); + goto out; } #endif @@ -357,7 +348,7 @@ out: void __init platform_init(void) { arch_reserve_vmcore(); - arch_parse_crashkernel(); + arch_reserve_crashkernel(); #ifdef CONFIG_ACPI_TABLE_UPGRADE acpi_table_upgrade(); @@ -467,15 +458,6 @@ static void __init resource_init(void) request_resource(res, &data_resource); request_resource(res, &bss_resource); } - -#ifdef CONFIG_KEXEC - if (crashk_res.start < crashk_res.end) { - insert_resource(&iomem_resource, &crashk_res); - pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", - (unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20), - (unsigned long)(crashk_res.start >> 20)); - } -#endif } static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5bca12d16e0691c8e9511f6043a7a5af9655af20..a16e3dbe9f09eb2fbf1b239b982b727330f7c233 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -208,7 +208,7 @@ static void __init fdt_smp_setup(void) } loongson_sysconf.nr_cpus = num_processors; - set_bit(0, &(loongson_sysconf.cores_io_master)); + set_bit(0, loongson_sysconf.cores_io_master); #endif } @@ -216,6 +216,9 @@ void __init loongson_smp_setup(void) { fdt_smp_setup(); + if (loongson_sysconf.cores_per_package == 0) + loongson_sysconf.cores_per_package = num_processors; + cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 4fcd6cd6da234d4dc4120cb2c4b95a69e2577358..e73323d759d0b85b275aa389fc684b74d12cb13e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -201,6 +201,11 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +bool bpf_jit_supports_far_kfunc_call(void) +{ + return true; +} + /* initialized on the first pass of build_body() */ static int out_offset = -1; static int emit_bpf_tail_call(struct jit_ctx *ctx) @@ -465,7 +470,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext const u8 dst = regmap[insn->dst_reg]; const s16 off = insn->off; const s32 imm = insn->imm; - const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32; switch (code) { @@ -923,8 +927,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: + { + const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + move_imm(ctx, dst, imm64, is32); return 1; + } /* dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_B: diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index ed12358c4783b468ae834106925732ef5875c772..9a71b0148461a4551fe4aae49ca9cf8fea6d46fe 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -191,6 +191,7 @@ extern void cache_push_v(unsigned long vaddr, int len); #define flush_cache_all() __flush_cache_all() #define flush_cache_vmap(start, end) flush_cache_all() +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) flush_cache_all() static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index b51f738a39a05ad9bd4c41971027774365ec30da..4714074c8bd7f557ee57e7f0354d1f411bb1e04d 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -287,7 +287,8 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_FUSE_FS=m CONFIG_CUSE=m -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_FSCACHE_STATS=y CONFIG_CACHEFILES=m CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 38f17b6584218739adbe4c8139f21be774101cf5..3389e6e885d9fa104a5342cd1d5a22fe5639c36c 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -238,7 +238,8 @@ CONFIG_BTRFS_FS=m CONFIG_QUOTA=y CONFIG_QFMT_V2=m CONFIG_AUTOFS_FS=m -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 07839a4b397e5bcc006a68d06286a3d9d8d11875..78f4987520664b4e606e85ef3a7d78183a205aa0 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -356,7 +356,8 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m CONFIG_VIRTIO_FS=m -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_MSDOS_FS=m diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig index 166d2ad372d142d9a919d83d073588f4f02ea80a..54774f90c23eafa397784f15d8f38b40c5a1254b 100644 --- a/arch/mips/configs/pic32mzda_defconfig +++ b/arch/mips/configs/pic32mzda_defconfig @@ -68,7 +68,8 @@ CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index f36c2519ed9768b8eb570e9b8feb0f1ad0bf8de5..1f14132b3fc98afb6c44de0b7efda4d820978278 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -97,6 +97,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) __flush_cache_vmap(); } +#define flush_cache_vmap_early(start, end) do { } while (0) + extern void (*__flush_cache_vunmap)(void); static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 3c4fc97b9f394b0c2f2d22c5f59d23c0105cbfb3..0f3cec663a12cd51498157c390f974213cb5a658 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -138,7 +138,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { dev->dma_coherent = coherent; } diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 348cea0977927a523022217bc3637b498a8e4185..81484a776b333a2d9c9b402461f296b3b091e219 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -38,6 +38,7 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page, #define flush_icache_pages flush_icache_pages #define flush_cache_vmap(start, end) flush_dcache_range(start, end) +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) flush_dcache_range(start, end) extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index b4006f2a97052da67eaf3d5dcbac95197dc0fd69..ba4c05bc24d6901124deb89152c82a62aa3f5e4d 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -41,6 +41,7 @@ void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vmap(start, end) flush_cache_all() +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) flush_cache_all() void flush_dcache_folio(struct folio *folio); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 414b978b8010b0cbac4511b606ee16c1a5236cc8..b9fc064d38d281f1c32584e79edd705c670b1731 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -859,6 +859,7 @@ config THREAD_SHIFT int "Thread shift" if EXPERT range 13 15 default "15" if PPC_256K_PAGES + default "15" if PPC_PSERIES || PPC_POWERNV default "14" if PPC64 default "13" help diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b4399ad5de300d951ed58f5aa074655f16101364..08ca4772566da3162b2ed44eec747bc3a9fbbb5e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,6 +53,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT @@ -66,9 +67,10 @@ config RISCV select CLINT_TIMER if !MMU select CLONE_BACKWARDS select COMMON_CLK - select CPU_PM if CPU_IDLE || HIBERNATION + select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) + select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP @@ -115,6 +117,7 @@ config RISCV select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER @@ -143,6 +146,8 @@ config RISCV select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU @@ -184,6 +189,20 @@ config HAVE_SHADOW_CALL_STACK # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769 depends on $(ld-option,--no-relax-gp) +config RISCV_USE_LINKER_RELAXATION + def_bool y + # https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985 + depends on !LD_IS_LLD || LLD_VERSION >= 150000 + +# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6 +config ARCH_HAS_BROKEN_DWARF5 + def_bool y + depends on RISCV_USE_LINKER_RELAXATION + # https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a + depends on AS_IS_LLVM && AS_VERSION < 180000 + # https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77 + depends on LD_IS_LLD && LLD_VERSION < 180000 + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 @@ -417,7 +436,9 @@ config NUMA depends on SMP && MMU select ARCH_SUPPORTS_NUMA_BALANCING select GENERIC_ARCH_NUMA + select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PER_CPU_PAGE_FIRST_CHUNK select OF_NUMA select USE_PERCPU_NUMA_NODE_ID help @@ -528,6 +549,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE If you don't know what to do here, say Y. +config RISCV_ISA_V_UCOPY_THRESHOLD + int "Threshold size for vectorized user copies" + depends on RISCV_ISA_V + default 768 + help + Prefer using vectorized copy_to_user()/copy_from_user() when the + workload size exceeds this value. + +config RISCV_ISA_V_PREEMPTIVE + bool "Run kernel-mode Vector with kernel preemption" + depends on PREEMPTION + depends on RISCV_ISA_V + default y + help + Usually, in-kernel SIMD routines are run with preemption disabled. + Functions which envoke long running SIMD thus must yield core's + vector unit to prevent blocking other tasks for too long. + + This config allows kernel to run SIMD without explicitly disable + preemption. Enabling this config will result in higher memory + consumption due to the allocation of per-task's kernel Vector context. + config TOOLCHAIN_HAS_ZBB bool default y @@ -654,6 +697,20 @@ config RISCV_MISALIGNED load/store for both kernel and userspace. When disable, misaligned accesses will generate SIGBUS in userspace and panic in kernel. +config RISCV_EFFICIENT_UNALIGNED_ACCESS + bool "Assume the CPU supports fast unaligned memory accesses" + depends on NONPORTABLE + select DCACHE_WORD_ACCESS if MMU + select HAVE_EFFICIENT_UNALIGNED_ACCESS + help + Say Y here if you want the kernel to assume that the CPU supports + efficient unaligned memory accesses. When enabled, this option + improves the performance of the kernel on such CPUs. However, the + kernel will run much more slowly, or will not be able to run at all, + on CPUs that do not support efficient unaligned memory accesses. + + If unsure what to do here, say N. + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index f5c432b005e77a46b4cdc1ed3f8b8ae160d2b1a0..910ba8837add866f622fba84f7c0c3535ee175e6 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO depends on ERRATA_THEAD && MMU select DMA_DIRECT_REMAP select RISCV_DMA_NONCOHERENT + select RISCV_NONSTANDARD_CACHE_OPS default y help This will apply the cache management errata to handle the diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index a74be78678eb0bcabf3d9571669a125401adb64d..0b7d109258e7d850846bb3c5f084a0482f07d02b 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -43,8 +43,7 @@ else KBUILD_LDFLAGS += -melf32lriscv endif -ifeq ($(CONFIG_LD_IS_LLD),y) -ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y) +ifndef CONFIG_RISCV_USE_LINKER_RELAXATION KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM @@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-mno-relax endif endif -endif ifeq ($(CONFIG_SHADOW_CALL_STACK),y) KBUILD_LDFLAGS += --no-relax-gp @@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # unaligned accesses. While unaligned accesses are explicitly allowed in the # RISC-V ISA, they're emulated by machine mode traps on all extant # architectures. It's faster to have GCC emit only aligned accesses. +ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y) KBUILD_CFLAGS += $(call cc-option,-mstrict-align) +endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare @@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) +loader.bin: loader +Image.gz loader vmlinuz.efi: Image $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @$(kecho) ' Kernel: $(boot)/$@ is ready' diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 905881282a7cd115fa222a68faab57545e868e10..eaf34e871e308f0db7a0a578b34940d8d551b163 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 0554ed4bf087cf6cd06dc2967c0c2c38e9784887..b1c410bbc1aece3c1fe0bea8cbd68271c8c0e29a 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -12,8 +12,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage, return false; } +/* + * th.dcache.ipa rs1 (invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01010 rs1 000 00000 0001011 + * th.dcache.iva rs1 (invalidate, virtual address) + * 0000001 00110 rs1 000 00000 0001011 + * + * th.dcache.cpa rs1 (clean, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01001 rs1 000 00000 0001011 + * th.dcache.cva rs1 (clean, virtual address) + * 0000001 00101 rs1 000 00000 0001011 + * + * th.dcache.cipa rs1 (clean then invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01011 rs1 000 00000 0001011 + * th.dcache.civa rs1 (clean then invalidate, virtual address) + * 0000001 00111 rs1 000 00000 0001011 + * + * th.sync.s (make sure all cache operations finished) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000000 11001 00000 000 00000 0001011 + */ +#define THEAD_INVAL_A0 ".long 0x02a5000b" +#define THEAD_CLEAN_A0 ".long 0x0295000b" +#define THEAD_FLUSH_A0 ".long 0x02b5000b" +#define THEAD_SYNC_S ".long 0x0190000b" + +#define THEAD_CMO_OP(_op, _start, _size, _cachesize) \ +asm volatile("mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + THEAD_##_op##_A0 "\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + THEAD_SYNC_S \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_start) + (_size)) \ + : "a0") + +static void thead_errata_cache_inv(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size); +} + +static void thead_errata_cache_wback(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size); +} + +static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size); +} + +static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = { + .wback = &thead_errata_cache_wback, + .inv = &thead_errata_cache_inv, + .wback_inv = &thead_errata_cache_wback_inv, +}; + static bool errata_probe_cmo(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage, if (stage == RISCV_ALTERNATIVES_BOOT) { riscv_cbom_block_size = L1_CACHE_BYTES; riscv_noncoherent_supported(); + riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops); } return true; @@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pbmt(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); - if (errata_probe_cmo(stage, archid, impid)) - cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + errata_probe_cmo(stage, archid, impid); if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h new file mode 100644 index 0000000000000000000000000000000000000000..c20236a0725b9e27a31d28b580db1bd1ad2c945a --- /dev/null +++ b/arch/riscv/include/asm/arch_hweight.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Based on arch/x86/include/asm/arch_hweight.h + */ + +#ifndef _ASM_RISCV_HWEIGHT_H +#define _ASM_RISCV_HWEIGHT_H + +#include +#include + +#if (BITS_PER_LONG == 64) +#define CPOPW "cpopw " +#elif (BITS_PER_LONG == 32) +#define CPOPW "cpop " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned int __arch_hweight32(unsigned int w) +{ +#ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + CPOPW "%0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +#endif + return __sw_hweight32(w); +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +#if BITS_PER_LONG == 64 +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ +# ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + "cpop %0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +# endif + return __sw_hweight64(w); +} +#else /* BITS_PER_LONG == 64 */ +static inline unsigned long __arch_hweight64(__u64 w) +{ + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +} +#endif /* !(BITS_PER_LONG == 64) */ + +#endif /* _ASM_RISCV_HWEIGHT_H */ diff --git a/arch/riscv/include/asm/archrandom.h b/arch/riscv/include/asm/archrandom.h new file mode 100644 index 0000000000000000000000000000000000000000..5345360adfb9cf183b831e41191a828bc87049b5 --- /dev/null +++ b/arch/riscv/include/asm/archrandom.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Kernel interface for the RISCV arch_random_* functions + * + * Copyright (c) 2023 Rivos Inc. + * + */ + +#ifndef ASM_RISCV_ARCHRANDOM_H +#define ASM_RISCV_ARCHRANDOM_H + +#include +#include + +#define SEED_RETRY_LOOPS 100 + +static inline bool __must_check csr_seed_long(unsigned long *v) +{ + unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0; + const int needed_seeds = sizeof(long) / sizeof(u16); + u16 *entropy = (u16 *)v; + + do { + /* + * The SEED CSR must be accessed with a read-write instruction. + */ + unsigned long csr_seed = csr_swap(CSR_SEED, 0); + unsigned long opst = csr_seed & SEED_OPST_MASK; + + switch (opst) { + case SEED_OPST_ES16: + entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK; + if (valid_seeds == needed_seeds) + return true; + break; + + case SEED_OPST_DEAD: + pr_err_once("archrandom: Unrecoverable error\n"); + return false; + + case SEED_OPST_BIST: + case SEED_OPST_WAIT: + default: + cpu_relax(); + continue; + } + } while (--retry); + + return false; +} + +static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) +{ + return 0; +} + +static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) +{ + if (!max_longs) + return 0; + + /* + * If Zkr is supported and csr_seed_long succeeds, we return one long + * worth of entropy. + */ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v)) + return 1; + + return 0; +} + +#endif /* ASM_RISCV_ARCHRANDOM_H */ diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h index 00a96e7a966445175a687d481efa29f6862b3675..0c8bfd54fc4e05beec2fed22fc7f73ddcc997ab7 100644 --- a/arch/riscv/include/asm/asm-extable.h +++ b/arch/riscv/include/asm/asm-extable.h @@ -6,6 +6,7 @@ #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 #define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 #ifdef CONFIG_MMU @@ -47,6 +48,11 @@ #define EX_DATA_REG_ZERO_SHIFT 5 #define EX_DATA_REG_ZERO GENMASK(9, 5) +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + #define EX_DATA_REG(reg, gpr) \ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" @@ -62,6 +68,15 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + #endif /* __ASSEMBLY__ */ #else /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 36b955c762ba08e92ca0441ee8fbae9219c1f2fa..cd627ec289f163a630b73dd03dd52a6b28692997 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b); long long __ashrti3(long long a, int b); long long __ashlti3(long long a, int b); +#ifdef CONFIG_RISCV_ISA_V + +#ifdef CONFIG_MMU +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n); +#endif /* CONFIG_MMU */ + +void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2); +void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3); +void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4); +void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5); + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs); +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs); +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +#endif /* CONFIG_RISCV_ISA_V */ #define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 224b4dc02b50bc6761cbef064445e472ef053ce2..9ffc355370248aed22dbe690ba1cde8e682a3588 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -271,7 +271,9 @@ legacy: #include #include -#include +#include + +#include #if (BITS_PER_LONG == 64) #define __AMO(op) "amo" #op ".d" diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 3cb53c4df27cfe4772f2f74623fc861f6a72360f..a129dac4521d35d69af22f713f5cdad27edf7f86 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -37,7 +37,8 @@ static inline void flush_dcache_page(struct page *page) flush_icache_mm(vma->vm_mm, 0) #ifdef CONFIG_64BIT -#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) +#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) +#define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end) #endif #ifndef CONFIG_SMP diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h new file mode 100644 index 0000000000000000000000000000000000000000..a5b60b54b101c3ba1e550b3e16e7096a6bfc8357 --- /dev/null +++ b/arch/riscv/include/asm/checksum.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Checksum routines + * + * Copyright (C) 2023 Rivos Inc. + */ +#ifndef __ASM_RISCV_CHECKSUM_H +#define __ASM_RISCV_CHECKSUM_H + +#include +#include + +#define ip_fast_csum ip_fast_csum + +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + +/* Default version is sufficient for 32 bit */ +#ifndef CONFIG_32BIT +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); +#endif + +/* Define riscv versions of functions before importing asm-generic/checksum.h */ +#include + +/** + * Quickly compute an IP checksum with the assumption that IPv4 headers will + * always be in multiples of 32-bits, and have an ihl of at least 5. + * + * @ihl: the number of 32 bit segments and must be greater than or equal to 5. + * @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on + * riscv, defining IP headers to be aligned. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned long csum = 0; + int pos = 0; + + do { + csum += ((const unsigned int *)iph)[pos]; + if (IS_ENABLED(CONFIG_32BIT)) + csum += csum < ((const unsigned int *)iph)[pos]; + } while (++pos < ihl); + + /* + * ZBB only saves three instructions on 32-bit and five on 64-bit so not + * worth checking if supported without Alternatives. + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + + if (IS_ENABLED(CONFIG_32BIT)) { + asm(".option push \n\ + .option arch,+zbb \n\ + not %[fold_temp], %[csum] \n\ + rori %[csum], %[csum], 16 \n\ + sub %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); + } else { + asm(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + not %[fold_temp], %[csum] \n\ + roriw %[csum], %[csum], 16 \n\ + subw %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); + } + return (__force __sum16)(csum >> 16); + } +no_zbb: +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + return csum_fold((__force __wsum)csum); +} + +#endif /* __ASM_RISCV_CHECKSUM_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbdde8b8a47edf7c1c2fae9249dd1712471c01dc..5a626ed2c47a8915b3848df2e7f4a7ea0601bd71 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -135,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); + #endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 306a19a5509c10e63663330b04e1118abb054b74..510014051f5dbb1aa61098e4974e7e7ac02145ee 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -411,6 +411,15 @@ #define CSR_VTYPE 0xc21 #define CSR_VLENB 0xc22 +/* Scalar Crypto Extension - Entropy */ +#define CSR_SEED 0x015 +#define SEED_OPST_MASK _AC(0xC0000000, UL) +#define SEED_OPST_BIST _AC(0x00000000, UL) +#define SEED_OPST_WAIT _AC(0x40000000, UL) +#define SEED_OPST_ES16 _AC(0x80000000, UL) +#define SEED_OPST_DEAD _AC(0xC0000000, UL) +#define SEED_ENTROPY_MASK _AC(0xFFFF, UL) + #ifdef CONFIG_RISCV_M_MODE # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 7ab5e34318c85fe05df525a5f80a49d25051bcd7..2293e535f8659af02ef2e52ce1752827c415532e 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -4,6 +4,23 @@ #define _ASM_RISCV_ENTRY_COMMON_H #include +#include +#include + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) { + clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE); + /* + * We are already called with irq disabled, so go without + * keeping track of riscv_v_flags. + */ + riscv_v_vstate_restore(¤t->thread.vstate, regs); + } +} + +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 83ed25e4355343c25101882b7c0b31cf462af542..ea33288f8a25b4f76e59bd65e8f869ee842c6e14 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -24,9 +24,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_PBMT 0 -#define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_PMU 2 -#define ERRATA_THEAD_NUMBER 3 +#define ERRATA_THEAD_PMU 1 +#define ERRATA_THEAD_NUMBER 2 #endif #ifdef __ASSEMBLY__ @@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE( \ #define ALT_THEAD_PMA(_val) #endif -/* - * th.dcache.ipa rs1 (invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01010 rs1 000 00000 0001011 - * th.dache.iva rs1 (invalida, virtual address) - * 0000001 00110 rs1 000 00000 0001011 - * - * th.dcache.cpa rs1 (clean, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01001 rs1 000 00000 0001011 - * th.dcache.cva rs1 (clean, virtual address) - * 0000001 00101 rs1 000 00000 0001011 - * - * th.dcache.cipa rs1 (clean then invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01011 rs1 000 00000 0001011 - * th.dcache.civa rs1 (... virtual address) - * 0000001 00111 rs1 000 00000 0001011 - * - * th.sync.s (make sure all cache operations finished) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000000 11001 00000 000 00000 0001011 - */ -#define THEAD_INVAL_A0 ".long 0x0265000b" -#define THEAD_CLEAN_A0 ".long 0x0255000b" -#define THEAD_FLUSH_A0 ".long 0x0275000b" -#define THEAD_SYNC_S ".long 0x0190000b" - #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ -asm volatile(ALTERNATIVE_2( \ - __nops(6), \ +asm volatile(ALTERNATIVE( \ + __nops(5), \ "mv a0, %1\n\t" \ "j 2f\n\t" \ "3:\n\t" \ CBO_##_op(a0) \ "add a0, a0, %0\n\t" \ "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ - "mv a0, %1\n\t" \ - "j 2f\n\t" \ - "3:\n\t" \ - THEAD_##_op##_A0 "\n\t" \ - "add a0, a0, %0\n\t" \ - "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - THEAD_SYNC_S, THEAD_VENDOR_ID, \ - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ + "bltu a0, %2, 3b\n\t", \ + 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ "r"((unsigned long)(_start) + (_size)) \ diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 2b2f5df7ef2c7de42216b4166ae3d1f4a789731f..3291721229523456247532009bc2ed2ddc444540 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -128,7 +128,23 @@ do { \ struct dyn_ftrace; int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +struct ftrace_ops; +struct ftrace_regs; +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func + +static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + regs->t1 = addr; +} +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e3ffef1c61193228c3b2f72794bb6289c188f57d..0c94260b5d0c126f6302f39a59507f19eed48dac 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -865,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #ifdef CONFIG_COMPAT -#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) +#define TASK_SIZE_32 (_AC(0x80000000, UL)) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #else diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f19f861cda549014eee042efb651709f5da00475..a8509cc31ab25a5dcc75765bdb99e43e87dded3b 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -16,7 +16,7 @@ #ifdef CONFIG_64BIT #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) -#define STACK_TOP_MAX TASK_SIZE_64 +#define STACK_TOP_MAX TASK_SIZE #define arch_get_mmap_end(addr, len, flags) \ ({ \ @@ -73,6 +73,43 @@ struct task_struct; struct pt_regs; +/* + * We use a flag to track in-kernel Vector context. Currently the flag has the + * following meaning: + * + * - bit 0: indicates whether the in-kernel Vector context is active. The + * activation of this state disables the preemption. On a non-RT kernel, it + * also disable bh. + * - bits 8: is used for tracking preemptible kernel-mode Vector, when + * RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not + * disable the preemption if the thread's kernel_vstate.datap is allocated. + * Instead, the kernel set this bit field. Then the trap entry/exit code + * knows if we are entering/exiting the context that owns preempt_v. + * - 0: the task is not using preempt_v + * - 1: the task is actively using preempt_v. But whether does the task own + * the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK. + * - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine + * when preempt_v starts: + * - 0: the task is actively using, and own preempt_v context. + * - non-zero: the task was using preempt_v, but then took a trap within. + * Thus, the task does not own preempt_v. Any use of Vector will have to + * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode + * Vector. + * - bit 30: The in-kernel preempt_v context is saved, and requries to be + * restored when returning to the context that owns the preempt_v. + * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the + * trap entry code. Any context switches out-of current task need to save + * it to the task's in-kernel V context. Also, any traps nesting on-top-of + * preempt_v requesting to use V needs a save. + */ +#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000 + +#define RISCV_V_CTX_UNIT_DEPTH 0x00010000 +#define RISCV_KERNEL_MODE_V 0x00000001 +#define RISCV_PREEMPT_V 0x00000100 +#define RISCV_PREEMPT_V_DIRTY 0x80000000 +#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000 + /* CPU-specific state of a task */ struct thread_struct { /* Callee-saved registers */ @@ -81,9 +118,11 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; - unsigned long vstate_ctrl; + u32 riscv_v_flags; + u32 vstate_ctrl; struct __riscv_v_ext_state vstate; unsigned long align_ctl; + struct __riscv_v_ext_state kernel_vstate; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index b6f898c56940a2d3626d90436185279d01f288b8..6e68f8dff76bc6d09f7a5e555e54474587021ed9 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -29,6 +29,7 @@ enum sbi_ext_id { SBI_EXT_RFENCE = 0x52464E43, SBI_EXT_HSM = 0x48534D, SBI_EXT_SRST = 0x53525354, + SBI_EXT_SUSP = 0x53555350, SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, @@ -115,6 +116,14 @@ enum sbi_srst_reset_reason { SBI_SRST_RESET_REASON_SYS_FAILURE, }; +enum sbi_ext_susp_fid { + SBI_EXT_SUSP_SYSTEM_SUSPEND = 0, +}; + +enum sbi_ext_susp_sleep_type { + SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0, +}; + enum sbi_ext_pmu_fid { SBI_EXT_PMU_NUM_COUNTERS = 0, SBI_EXT_PMU_COUNTER_GET_INFO, @@ -288,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg3, unsigned long arg4, unsigned long arg5); +#ifdef CONFIG_RISCV_SBI_V01 void sbi_console_putchar(int ch); int sbi_console_getchar(void); +#else +static inline void sbi_console_putchar(int ch) { } +static inline int sbi_console_getchar(void) { return -ENOENT; } +#endif long sbi_get_mvendorid(void); long sbi_get_marchid(void); long sbi_get_mimpid(void); @@ -346,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major, } int sbi_err_map_linux_errno(int err); + +extern bool sbi_debug_console_available; +int sbi_debug_console_write(const char *bytes, unsigned int num_bytes); +int sbi_debug_console_read(char *bytes, unsigned int num_bytes); + #else /* CONFIG_RISCV_SBI */ static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h new file mode 100644 index 0000000000000000000000000000000000000000..54efbf523d49c67d75921c7f8454efc87ad0f257 --- /dev/null +++ b/arch/riscv/include/asm/simd.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 Linaro Ltd. + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_SIMD_H +#define __ASM_SIMD_H + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_RISCV_ISA_V +/* + * may_use_simd - whether it is allowable at this time to issue vector + * instructions or access the vector register file + * + * Callers must not assume that the result remains true beyond the next + * preempt_enable() or return from softirq context. + */ +static __must_check inline bool may_use_simd(void) +{ + /* + * RISCV_KERNEL_MODE_V is only set while preemption is disabled, + * and is clear whenever preemption is enabled. + */ + if (in_hardirq() || in_nmi()) + return false; + + /* + * Nesting is acheived in preempt_v by spreading the control for + * preemptible and non-preemptible kernel-mode Vector into two fields. + * Always try to match with prempt_v if kernel V-context exists. Then, + * fallback to check non preempt_v if nesting happens, or if the config + * is not set. + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) { + if (!riscv_preempt_v_started(current)) + return true; + } + /* + * Non-preemptible kernel-mode Vector temporarily disables bh. So we + * must not return true on irq_disabled(). Otherwise we would fail the + * lockdep check calling local_bh_enable() + */ + return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V); +} + +#else /* ! CONFIG_RISCV_ISA_V */ + +static __must_check inline bool may_use_simd(void) +{ + return false; +} + +#endif /* ! CONFIG_RISCV_ISA_V */ + +#endif diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index f90d8e42f3c7911908ec1f5f19929ab5ba67ff3a..7efdb0584d47ac9887126a00dcfcc045619b27b5 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev, struct pt_regs *regs; regs = task_pt_regs(prev); - if (unlikely(regs->status & SR_SD)) - fstate_save(prev, regs); + fstate_save(prev, regs); fstate_restore(next, task_pt_regs(next)); } diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 4856697c5f25a99d680c57e40b280372fa26509b..5d473343634b9d3af3c1f1872da25e6e60f77162 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ #define TIF_32BIT 11 /* compat-mode 32bit process */ +#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ diff --git a/arch/riscv/include/asm/tlbbatch.h b/arch/riscv/include/asm/tlbbatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46014f70b9daa19348e6b9d7162b5628dbd1e7b9 --- /dev/null +++ b/arch/riscv/include/asm/tlbbatch.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +#ifndef _ASM_RISCV_TLBBATCH_H +#define _ASM_RISCV_TLBBATCH_H + +#include + +struct arch_tlbflush_unmap_batch { + struct cpumask cpumask; +}; + +#endif /* _ASM_RISCV_TLBBATCH_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 8f3418c5f1724ba45e412ca52e0ef59ba0140638..928f096dca21b4e6cbafc009595cd34bb9917109 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -41,11 +41,20 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); void flush_tlb_kernel_range(unsigned long start, unsigned long end); +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif + +bool arch_tlbbatch_should_defer(struct mm_struct *mm); +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr); +void arch_flush_tlb_batched_pending(struct mm_struct *mm); +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + #else /* CONFIG_SMP && CONFIG_MMU */ #define flush_tlb_all() local_flush_tlb_all() diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 87aaef656257cbde40331aadaf1cb0b1ea374455..0cd6f0a027d1f7ae7bb95b509bad3400c9fa71a5 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -22,6 +22,18 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); bool riscv_v_first_use_handler(struct pt_regs *regs); +void kernel_vector_begin(void); +void kernel_vector_end(void); +void get_cpu_vector_context(void); +void put_cpu_vector_context(void); +void riscv_v_thread_free(struct task_struct *tsk); +void __init riscv_v_setup_ctx_cache(void); +void riscv_v_thread_alloc(struct task_struct *tsk); + +static inline u32 riscv_v_flags(void) +{ + return READ_ONCE(current->thread.riscv_v_flags); +} static __always_inline bool has_vector(void) { @@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs) __riscv_v_vstate_dirty(regs); } -static inline void riscv_v_vstate_save(struct task_struct *task, +static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if ((regs->status & SR_VS) == SR_VS_DIRTY) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; - __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } -static inline void riscv_v_vstate_restore(struct task_struct *task, +static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if ((regs->status & SR_VS) != SR_VS_OFF) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; - __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } +static inline void riscv_v_vstate_set_restore(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) != SR_VS_OFF) { + set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); + riscv_v_vstate_on(regs); + } +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static inline bool riscv_preempt_v_dirty(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY); +} + +static inline bool riscv_preempt_v_restore(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_preempt_v_clear_dirty(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_set_restore(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE; +} + +static inline bool riscv_preempt_v_started(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V); +} + +#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */ +static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; } +#define riscv_preempt_v_clear_dirty(tsk) do {} while (0) +#define riscv_preempt_v_set_restore(tsk) do {} while (0) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + static inline void __switch_to_vector(struct task_struct *prev, struct task_struct *next) { struct pt_regs *regs; - regs = task_pt_regs(prev); - riscv_v_vstate_save(prev, regs); - riscv_v_vstate_restore(next, task_pt_regs(next)); + if (riscv_preempt_v_started(prev)) { + if (riscv_preempt_v_dirty(prev)) { + __riscv_v_vstate_save(&prev->thread.kernel_vstate, + prev->thread.kernel_vstate.datap); + riscv_preempt_v_clear_dirty(prev); + } + } else { + regs = task_pt_regs(prev); + riscv_v_vstate_save(&prev->thread.vstate, regs); + } + + if (riscv_preempt_v_started(next)) + riscv_preempt_v_set_restore(next); + else + riscv_v_vstate_set_restore(next, task_pt_regs(next)); } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); @@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_discard(regs) do {} while (0) -#define riscv_v_vstate_save(task, regs) do {} while (0) -#define riscv_v_vstate_restore(task, regs) do {} while (0) +#define riscv_v_vstate_save(vstate, regs) do {} while (0) +#define riscv_v_vstate_restore(vstate, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0) +#define riscv_v_thread_free(tsk) do {} while (0) +#define riscv_v_setup_ctx_cache() do {} while (0) +#define riscv_v_thread_alloc(tsk) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h index 7c086ac6ecd4a82e11b9cd7cbd8d6944fb68ae1f..f3f031e34191d6fb993f650126be4dc793fee3ed 100644 --- a/arch/riscv/include/asm/word-at-a-time.h +++ b/arch/riscv/include/asm/word-at-a-time.h @@ -9,6 +9,7 @@ #define _ASM_RISCV_WORD_AT_A_TIME_H +#include #include struct word_at_a_time { @@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +#ifdef CONFIG_DCACHE_WORD_ACCESS + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret; + + /* Load word from unaligned pointer addr */ + asm( + "1: " REG_L " %0, %2\n" + "2:\n" + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) + : "r" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + +#endif /* CONFIG_DCACHE_WORD_ACCESS */ + #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */ diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 0000000000000000000000000000000000000000..96011861e46b4df24cb973a9066324283ad17b60 --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ + +#include +#include +#ifdef CONFIG_RISCV_ISA_V +#include +#include +#include + +static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2) +{ + kernel_vector_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_vector_end(); +} + +static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3) +{ + kernel_vector_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_vector_end(); +} + +static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4) +{ + kernel_vector_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_vector_end(); +} + +static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5) +{ + kernel_vector_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_vector_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_vector_2, + .do_3 = xor_vector_3, + .do_4 = xor_vector_4, + .do_5 = xor_vector_5 +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector()) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c92c623b311e7b78b8e5055f66d780d328025b60..f71910718053d841a361fd97e7d62da4f86bebcf 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index e32591e9da90867b6b0760c6f553b5c146fba847..89920f84d0a34385471e9afbf9c26d287cbbd838 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS]; /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); +static cpumask_t fast_misaligned_access; + /** * riscv_isa_extension_base() - Get base extension word * @@ -784,6 +788,16 @@ static int check_unaligned_access(void *param) (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + return 0; } @@ -796,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param) check_unaligned_access(pages[cpu]); } +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + static int riscv_online_cpu(unsigned int cpu) { static struct page *buf; /* We are already set since the last check */ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - return 0; + goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { @@ -812,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu) check_unaligned_access(buf); __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + return 0; } @@ -846,9 +927,12 @@ static int check_unaligned_access_all_cpus(void) /* Check core 0. */ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - /* Setup hotplug callback for any new CPUs that come online. */ + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, NULL); + riscv_online_cpu, riscv_offline_cpu); out: unaligned_emulation_finish(); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 54ca4564a92631388783a7978e8f49f40e556364..9d1a305d55087bb3a6bdc73f8ed8ebe3206775b1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception) /* Load the kernel shadow call stack pointer if coming from userspace */ scs_load_current_if_task_changed s5 +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_start +#endif move a0, sp /* pt_regs */ la ra, ret_from_exception @@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) */ csrw CSR_SCRATCH, tp 1: +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_end +#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 03a6434a8cdd0035bbc59629b1751c00df24b918..f5aa24d9e1c150e651f5eeb144da8671e9ac5ddc 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct pt_regs *regs = arch_ftrace_get_regs(fregs); + unsigned long *parent = (unsigned long *)®s->ra; + + prepare_ftrace_return(parent, ip, frame_pointer(regs)); +} +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ extern void ftrace_graph_call(void); -extern void ftrace_graph_regs_call(void); int ftrace_enable_ftrace_graph_caller(void) { - int ret; - - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); - if (ret) - return ret; - - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, true, true); } int ftrace_disable_ftrace_graph_caller(void) { - int ret; - - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); - if (ret) - return ret; - - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, false, true); } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c new file mode 100644 index 0000000000000000000000000000000000000000..6afe80c7f03ab0c195ee43725d6dfd041dc0464b --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas + * Copyright (C) 2017 Linaro Ltd. + * Copyright (C) 2021 SiFive + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +#include +#endif + +static inline void riscv_v_flags_set(u32 flags) +{ + WRITE_ONCE(current->thread.riscv_v_flags, flags); +} + +static inline void riscv_v_start(u32 flags) +{ + int orig; + + orig = riscv_v_flags(); + BUG_ON((orig & flags) != 0); + riscv_v_flags_set(orig | flags); + barrier(); +} + +static inline void riscv_v_stop(u32 flags) +{ + int orig; + + barrier(); + orig = riscv_v_flags(); + BUG_ON((orig & flags) == 0); + riscv_v_flags_set(orig & ~flags); +} + +/* + * Claim ownership of the CPU vector context for use by the calling context. + * + * The caller may freely manipulate the vector context metadata until + * put_cpu_vector_context() is called. + */ +void get_cpu_vector_context(void) +{ + /* + * disable softirqs so it is impossible for softirqs to nest + * get_cpu_vector_context() when kernel is actively using Vector. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); + + riscv_v_start(RISCV_KERNEL_MODE_V); +} + +/* + * Release the CPU vector context. + * + * Must be called from a context in which get_cpu_vector_context() was + * previously called, with no call to put_cpu_vector_context() in the + * meantime. + */ +void put_cpu_vector_context(void) +{ + riscv_v_stop(RISCV_KERNEL_MODE_V); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static __always_inline u32 *riscv_v_flags_ptr(void) +{ + return ¤t->thread.riscv_v_flags; +} + +static inline void riscv_preempt_v_set_dirty(void) +{ + *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_reset_flags(void) +{ + *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_v_ctx_depth_inc(void) +{ + *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH; +} + +static inline void riscv_v_ctx_depth_dec(void) +{ + *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH; +} + +static inline u32 riscv_v_ctx_get_depth(void) +{ + return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK; +} + +static int riscv_v_stop_kernel_context(void) +{ + if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current)) + return 1; + + riscv_preempt_v_clear_dirty(current); + riscv_v_stop(RISCV_PREEMPT_V); + return 0; +} + +static int riscv_v_start_kernel_context(bool *is_nested) +{ + struct __riscv_v_ext_state *kvstate, *uvstate; + + kvstate = ¤t->thread.kernel_vstate; + if (!kvstate->datap) + return -ENOENT; + + if (riscv_preempt_v_started(current)) { + WARN_ON(riscv_v_ctx_get_depth() == 0); + *is_nested = true; + get_cpu_vector_context(); + if (riscv_preempt_v_dirty(current)) { + __riscv_v_vstate_save(kvstate, kvstate->datap); + riscv_preempt_v_clear_dirty(current); + } + riscv_preempt_v_set_restore(current); + return 0; + } + + /* Transfer the ownership of V from user to kernel, then save */ + riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); + if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + uvstate = ¤t->thread.vstate; + __riscv_v_vstate_save(uvstate, uvstate->datap); + } + riscv_preempt_v_clear_dirty(current); + return 0; +} + +/* low-level V context handling code, called with irq disabled */ +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) +{ + int depth; + + if (!riscv_preempt_v_started(current)) + return; + + depth = riscv_v_ctx_get_depth(); + if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + riscv_preempt_v_set_dirty(); + + riscv_v_ctx_depth_inc(); +} + +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs) +{ + struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate; + u32 depth; + + WARN_ON(!irqs_disabled()); + + if (!riscv_preempt_v_started(current)) + return; + + riscv_v_ctx_depth_dec(); + depth = riscv_v_ctx_get_depth(); + if (depth == 0) { + if (riscv_preempt_v_restore(current)) { + __riscv_v_vstate_restore(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + riscv_preempt_v_reset_flags(); + } + } +} +#else +#define riscv_v_start_kernel_context(nested) (-ENOENT) +#define riscv_v_stop_kernel_context() (-ENOENT) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +/* + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the vector registers is saved back to memory as necessary. + * + * A matching call to kernel_vector_end() must be made before returning from the + * calling context. + * + * The caller may freely use the vector registers until kernel_vector_end() is + * called. + */ +void kernel_vector_begin(void) +{ + bool nested = false; + + if (WARN_ON(!has_vector())) + return; + + BUG_ON(!may_use_simd()); + + if (riscv_v_start_kernel_context(&nested)) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + } + + if (!nested) + riscv_v_vstate_set_restore(current, task_pt_regs(current)); + + riscv_v_enable(); +} +EXPORT_SYMBOL_GPL(kernel_vector_begin); + +/* + * kernel_vector_end(): give the CPU vector registers back to the current task + * + * Must be called from a context in which kernel_vector_begin() was previously + * called, with no call to kernel_vector_end() in the meantime. + * + * The caller must not use the vector registers after this function is called, + * unless kernel_vector_begin() is called again in the meantime. + */ +void kernel_vector_end(void) +{ + if (WARN_ON(!has_vector())) + return; + + riscv_v_disable(); + + if (riscv_v_stop_kernel_context()) + put_cpu_vector_context(); +} +EXPORT_SYMBOL_GPL(kernel_vector_end); diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 79dc81223238232a453aac10d6a5b87a06df0725..b7561288e8da616de4e2082622207a0f33e9b7f4 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -57,31 +57,150 @@ .endm #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - .macro SAVE_ALL + +/** +* SAVE_ABI_REGS - save regs against the pt_regs struct +* +* @all: tell if saving all the regs +* +* If all is set, all the regs will be saved, otherwise only ABI +* related regs (a0-a7,epc,ra and optional s0) will be saved. +* +* After the stack is established, +* +* 0(sp) stores the PC of the traced function which can be accessed +* by &(fregs)->regs->epc in tracing function. Note that the real +* function entry address should be computed with -FENTRY_RA_OFFSET. +* +* 8(sp) stores the function return address (i.e. parent IP) that +* can be accessed by &(fregs)->regs->ra in tracing function. +* +* The other regs are saved at the respective localtion and accessed +* by the respective pt_regs member. +* +* Here is the layout of stack for your reference. +* +* PT_SIZE_ON_STACK -> +++++++++ +* + ..... + +* + t3-t6 + +* + s2-s11+ +* + a0-a7 + --++++-> ftrace_caller saved +* + s1 + + +* + s0 + --+ +* + t0-t2 + + +* + tp + + +* + gp + + +* + sp + + +* + ra + --+ // parent IP +* sp -> + epc + --+ // PC +* +++++++++ +**/ + .macro SAVE_ABI_REGS, all=0 addi sp, sp, -PT_SIZE_ON_STACK - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - save_from_x6_to_x31 + REG_S t0, PT_EPC(sp) + REG_S x1, PT_RA(sp) + + // save the ABI regs + + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + + // save the leftover regs + + .if \all == 1 + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + + // save s0 if FP_TEST defined + + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_S x8, PT_S0(sp) +#endif + .endif .endm - .macro RESTORE_ALL - REG_L x1, PT_RA(sp) - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - /* Restore t0 with PT_EPC */ - REG_L x5, PT_EPC(sp) - restore_from_x6_to_x31 + .macro RESTORE_ABI_REGS, all=0 + REG_L t0, PT_EPC(sp) + REG_L x1, PT_RA(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + .if \all == 1 + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_L x8, PT_S0(sp) +#endif + .endif addi sp, sp, PT_SIZE_ON_STACK .endm + + .macro PREPARE_ARGS + addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) + mv a1, ra + mv a3, sp + .endm + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS SYM_FUNC_START(ftrace_caller) SAVE_ABI @@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI - jr t0 + jr t0 SYM_FUNC_END(ftrace_caller) -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ SYM_FUNC_START(ftrace_regs_caller) - SAVE_ALL - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + mv t1, zero + SAVE_ABI_REGS 1 + PREPARE_ARGS SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, PT_RA - REG_L a1, PT_EPC(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 -#endif -SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) + RESTORE_ABI_REGS 1 + bnez t1, .Ldirect + jr t0 +.Ldirect: + jr t1 +SYM_FUNC_END(ftrace_regs_caller) + +SYM_FUNC_START(ftrace_caller) + SAVE_ABI_REGS 0 + PREPARE_ARGS + +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub -#endif - RESTORE_ALL - jr t0 -SYM_FUNC_END(ftrace_regs_caller) + RESTORE_ABI_REGS 0 + jr t0 +SYM_FUNC_END(ftrace_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_CODE_START(ftrace_stub_direct_tramp) + jr t0 +SYM_CODE_END(ftrace_stub_direct_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 862834bb1d64387d161ec0ef6ab0c3debfc9fa1b..5e5a82644451e16d8bdfe229e2ce89b4c389c31e 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head); kfree(rel_head->rel_entry); + kfree(rel_head); return -ENOMEM; } @@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + /* + * When hashtable_size == 1, hashtable_bits == 0. + * This is valid because the hashing algorithm returns 0 in this case. + */ unsigned int hashtable_bits = ilog2(hashtable_size); /* @@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), + sizeof(**relocation_hashtable), GFP_KERNEL); if (!*relocation_hashtable) - return -ENOMEM; + return 0; __hash_init(*relocation_hashtable, hashtable_size); @@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Sym *sym; void *location; unsigned int i, type; + unsigned int j_idx = 0; Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); @@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, hashtable_bits = initialize_relocation_hashtable(num_relocations, &relocation_hashtable); - if (hashtable_bits < 0) - return hashtable_bits; + if (!relocation_hashtable) + return -ENOMEM; INIT_LIST_HEAD(&used_buckets_list); @@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, v = sym->st_value + rel[i].r_addend; if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) { - unsigned int j; + unsigned int j = j_idx; + bool found = false; - for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { + do { unsigned long hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; @@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, hi20 = (offset + 0x800) & 0xfffff000; lo12 = offset - hi20; v = lo12; + found = true; break; } - } - if (j == sechdrs[relsec].sh_size / sizeof(*rel)) { + + j++; + if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + j = 0; + + } while (j_idx != j); + + if (!found) { pr_err( "%s: Can not find HI20 relocation information\n", me->name); return -EINVAL; } + + /* Record the previous j-loop end index */ + j_idx = j; } if (reloc_handlers[type].accumulate_handler) diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index 68e786c84c949b23b9fa529686fc21ce89e94d64..f6d4dedffb8422051a3598ead6cea3d0bac96d7a 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa) if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || IS_ENABLED(CONFIG_CMDLINE_FORCE) || fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { - strncat(early_cmdline, CONFIG_CMDLINE, - COMMAND_LINE_SIZE - fdt_cmdline_size); + strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); } return early_cmdline; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 4f21d970a1292b06be357b8b33ed541751bbb091..92922dbd5b5c1f9b5d57643ecbd7a1599c5ac4c3 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -171,6 +171,7 @@ void flush_thread(void) riscv_v_vstate_off(task_pt_regs(current)); kfree(current->thread.vstate.datap); memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif } @@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ if (has_vector()) - kfree(tsk->thread.vstate.datap); + riscv_v_thread_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* clear entire V context, including datap for a new task */ memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE); return 0; } @@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->a0 = 0; /* Return value of fork() */ p->thread.s[0] = 0; } + p->thread.riscv_v_flags = 0; + if (has_vector()) + riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } + +void __init arch_task_cache_init(void) +{ + riscv_v_setup_ctx_cache(); +} diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2afe460de16a62ba21cf3c7db4a96c5ec8f7d3a5..e8515aa9d80bf82fd6ff2598664b9fe18a6b1de3 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target, * Ensure the vector registers have been saved to the memory before * copying them to membuf. */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); + if (target == current) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + put_cpu_vector_context(); + } ptrace_vstate.vstart = vstate->vstart; ptrace_vstate.vl = vstate->vl; diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5a62ed1da45332c85820fdfdd7e90046b1ae3380..e66e0999a80057058c66c71fa907a0fb0152bc00 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -571,6 +572,66 @@ long sbi_get_mimpid(void) } EXPORT_SYMBOL_GPL(sbi_get_mimpid); +bool sbi_debug_console_available; + +int sbi_debug_console_write(const char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + +int sbi_debug_console_read(char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + void __init sbi_init(void) { int ret; @@ -612,6 +673,11 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } + if ((sbi_spec_version >= sbi_mk_version(2, 0)) && + (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + pr_info("SBI DBCN extension detected\n"); + sbi_debug_console_available = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 33dfb507830100c73efd168f244006b90ff7525a..501e66debf69721d53db2515cea4df970a6b2784 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) /* datap is designed to be 16 byte aligned for better performance */ WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); - riscv_v_vstate_save(current, regs); + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, regs); + put_cpu_vector_context(); + /* Copy everything of vstate but datap. */ err = __copy_to_user(&state->v_state, ¤t->thread.vstate, offsetof(struct __riscv_v_ext_state, datap)); @@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) if (unlikely(err)) return err; - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return err; } diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 3c89b8ec69c49cce4809986f51784fcbfff53630..239509367e4233336806c19da964a06537d5a9b5 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -4,8 +4,12 @@ * Copyright (c) 2022 Ventana Micro Systems Inc. */ +#define pr_fmt(fmt) "suspend: " fmt + #include +#include #include +#include #include void suspend_save_csrs(struct suspend_context *context) @@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg, return rc; } + +#ifdef CONFIG_RISCV_SBI +static int sbi_system_suspend(unsigned long sleep_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND, + sleep_type, resume_addr, opaque, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + + return ret.value; +} + +static int sbi_system_suspend_enter(suspend_state_t state) +{ + return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend); +} + +static const struct platform_suspend_ops sbi_system_suspend_ops = { + .valid = suspend_valid_only_mem, + .enter = sbi_system_suspend_enter, +}; + +static int __init sbi_system_suspend_init(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_SUSP) > 0) { + pr_info("SBI SUSP extension detected\n"); + if (IS_ENABLED(CONFIG_SUSPEND)) + suspend_set_ops(&sbi_system_suspend_ops); + } + + return 0; +} + +arch_initcall(sbi_system_suspend_init); +#endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 578b6292487e1bb5e32309ee6874b8ba7a0c8315..6727d1d3b8f282c16a161c96ba898a17db87176e 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -21,6 +21,10 @@ #include static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); +static struct kmem_cache *riscv_v_user_cachep; +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static struct kmem_cache *riscv_v_kernel_cachep; +#endif unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void) return 0; } +void __init riscv_v_setup_ctx_cache(void) +{ + if (!has_vector()) + return; + + riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", + riscv_v_vsize, 16, SLAB_PANIC, + 0, riscv_v_vsize, NULL); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx", + riscv_v_vsize, 16, + SLAB_PANIC, NULL); +#endif +} + static bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; @@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf) return false; } -static int riscv_v_thread_zalloc(void) +static int riscv_v_thread_zalloc(struct kmem_cache *cache, + struct __riscv_v_ext_state *ctx) { void *datap; - datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + datap = kmem_cache_zalloc(cache, GFP_KERNEL); if (!datap) return -ENOMEM; - current->thread.vstate.datap = datap; - memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, - datap)); + ctx->datap = datap; + memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap)); return 0; } +void riscv_v_thread_alloc(struct task_struct *tsk) +{ +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate); +#endif +} + +void riscv_v_thread_free(struct task_struct *tsk) +{ + if (tsk->thread.vstate.datap) + kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + if (tsk->thread.kernel_vstate.datap) + kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap); +#endif +} + #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) @@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); if (inherit) ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; - tsk->thread.vstate_ctrl = ctrl; + tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK; + tsk->thread.vstate_ctrl |= ctrl; } bool riscv_v_vstate_ctrl_user_allowed(void) @@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) * context where VS has been off. So, try to allocate the user's V * context and resume execution. */ - if (riscv_v_thread_zalloc()) { + if (riscv_v_thread_zalloc(riscv_v_user_cachep, ¤t->thread.vstate)) { force_sig(SIGBUS); return true; } riscv_v_vstate_on(regs); - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return true; } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 26cb2502ecf8969b76a47e874f6be9e90219887b..bd6e6c1b0497b48419bedb70eb451010c6128eb9 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -6,8 +6,14 @@ lib-y += memmove.o lib-y += strcmp.o lib-y += strlen.o lib-y += strncmp.o +lib-y += csum.o +ifeq ($(CONFIG_MMU), y) +lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o +endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +lib-$(CONFIG_RISCV_ISA_V) += xor.o +lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c new file mode 100644 index 0000000000000000000000000000000000000000..af3df5274ccbae0118488080040f45881a3e025a --- /dev/null +++ b/arch/riscv/lib/csum.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Checksum library + * + * Influenced by arch/arm64/lib/csum.c + * Copyright (C) 2023 Rivos Inc. + */ +#include +#include +#include +#include +#include + +#include + +#include + +/* Default version is sufficient for 32 bit */ +#ifndef CONFIG_32BIT +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + unsigned int ulen, uproto; + unsigned long sum = (__force unsigned long)csum; + + sum += (__force unsigned long)saddr->s6_addr32[0]; + sum += (__force unsigned long)saddr->s6_addr32[1]; + sum += (__force unsigned long)saddr->s6_addr32[2]; + sum += (__force unsigned long)saddr->s6_addr32[3]; + + sum += (__force unsigned long)daddr->s6_addr32[0]; + sum += (__force unsigned long)daddr->s6_addr32[1]; + sum += (__force unsigned long)daddr->s6_addr32[2]; + sum += (__force unsigned long)daddr->s6_addr32[3]; + + ulen = (__force unsigned int)htonl((unsigned int)len); + sum += ulen; + + uproto = (__force unsigned int)htonl(proto); + sum += uproto; + + /* + * Zbb support saves 4 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + asm(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[sum], 32 \n\ + add %[sum], %[fold_temp], %[sum] \n\ + srli %[sum], %[sum], 32 \n\ + not %[fold_temp], %[sum] \n\ + roriw %[sum], %[sum], 16 \n\ + subw %[sum], %[fold_temp], %[sum] \n\ + .option pop" + : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp)); + return (__force __sum16)(sum >> 16); + } +no_zbb: + sum += ror64(sum, 32); + sum >>= 32; + return csum_fold((__force __wsum)sum); +} +EXPORT_SYMBOL(csum_ipv6_magic); +#endif /* !CONFIG_32BIT */ + +#ifdef CONFIG_32BIT +#define OFFSET_MASK 3 +#elif CONFIG_64BIT +#define OFFSET_MASK 7 +#endif + +static inline __no_sanitize_address unsigned long +do_csum_common(const unsigned long *ptr, const unsigned long *end, + unsigned long data) +{ + unsigned int shift; + unsigned long csum = 0, carry = 0; + + /* + * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be + * faster than doing 32-bit reads on architectures that support larger + * reads. + */ + while (ptr < end) { + csum += data; + carry += csum < data; + data = *(ptr++); + } + + /* + * Perform alignment (and over-read) bytes on the tail if any bytes + * leftover. + */ + shift = ((long)ptr - (long)end) * 8; +#ifdef __LITTLE_ENDIAN + data = (data << shift) >> shift; +#else + data = (data >> shift) << shift; +#endif + csum += data; + carry += csum < data; + csum += carry; + csum += csum < carry; + + return csum; +} + +/* + * Algorithm accounts for buff being misaligned. + * If buff is not aligned, will over-read bytes but not use the bytes that it + * shouldn't. The same thing will occur on the tail-end of the read. + */ +static inline __no_sanitize_address unsigned int +do_csum_with_alignment(const unsigned char *buff, int len) +{ + unsigned int offset, shift; + unsigned long csum, data; + const unsigned long *ptr, *end; + + /* + * Align address to closest word (double word on rv64) that comes before + * buff. This should always be in the same page and cache line. + * Directly call KASAN with the alignment we will be using. + */ + offset = (unsigned long)buff & OFFSET_MASK; + kasan_check_read(buff, len); + ptr = (const unsigned long *)(buff - offset); + + /* + * Clear the most significant bytes that were over-read if buff was not + * aligned. + */ + shift = offset * 8; + data = *(ptr++); +#ifdef __LITTLE_ENDIAN + data = (data >> shift) << shift; +#else + data = (data << shift) >> shift; +#endif + end = (const unsigned long *)(buff + len); + csum = do_csum_common(ptr, end, data); + +#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT + /* + * Zbb support saves 6 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + +#ifdef CONFIG_32BIT + asm_volatile_goto(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 16 \n\ + andi %[offset], %[offset], 1 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + beq %[offset], zero, %l[end] \n\ + rev8 %[csum], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : [offset] "r" (offset) + : + : end); + + return (unsigned short)csum; +#else /* !CONFIG_32BIT */ + asm_volatile_goto(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + roriw %[fold_temp], %[csum], 16 \n\ + addw %[csum], %[fold_temp], %[csum] \n\ + andi %[offset], %[offset], 1 \n\ + beq %[offset], zero, %l[end] \n\ + rev8 %[csum], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : [offset] "r" (offset) + : + : end); + + return (csum << 16) >> 48; +#endif /* !CONFIG_32BIT */ +end: + return csum >> 16; + } +no_zbb: +#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */ +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + csum = (u32)csum + ror32((u32)csum, 16); + if (offset & 1) + return (u16)swab32(csum); + return csum >> 16; +} + +/* + * Does not perform alignment, should only be used if machine has fast + * misaligned accesses, or when buff is known to be aligned. + */ +static inline __no_sanitize_address unsigned int +do_csum_no_alignment(const unsigned char *buff, int len) +{ + unsigned long csum, data; + const unsigned long *ptr, *end; + + ptr = (const unsigned long *)(buff); + data = *(ptr++); + + kasan_check_read(buff, len); + + end = (const unsigned long *)(buff + len); + csum = do_csum_common(ptr, end, data); + + /* + * Zbb support saves 6 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + +#ifdef CONFIG_32BIT + asm (".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 16 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : + : ); + +#else /* !CONFIG_32BIT */ + asm (".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + roriw %[fold_temp], %[csum], 16 \n\ + addw %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : + : ); +#endif /* !CONFIG_32BIT */ + return csum >> 16; + } +no_zbb: +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + csum = (u32)csum + ror32((u32)csum, 16); + return csum >> 16; +} + +/* + * Perform a checksum on an arbitrary memory address. + * Will do a light-weight address alignment if buff is misaligned, unless + * cpu supports fast misaligned accesses. + */ +unsigned int do_csum(const unsigned char *buff, int len) +{ + if (unlikely(len <= 0)) + return 0; + + /* + * Significant performance gains can be seen by not doing alignment + * on machines with fast misaligned accesses. + * + * There is some duplicate code between the "with_alignment" and + * "no_alignment" implmentations, but the overlap is too awkward to be + * able to fit in one function without introducing multiple static + * branches. The largest chunk of overlap was delegated into the + * do_csum_common function. + */ + if (static_branch_likely(&fast_misaligned_access_speed_key)) + return do_csum_no_alignment(buff, len); + + if (((unsigned long)buff & OFFSET_MASK) == 0) + return do_csum_no_alignment(buff, len); + + return do_csum_with_alignment(buff, len); +} diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c new file mode 100644 index 0000000000000000000000000000000000000000..be38a93cedaec5dafcd15d2f855df11474d2f383 --- /dev/null +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SiFive + * Author: Andy Chiu + */ +#include +#include + +#include +#include + +#ifdef CONFIG_MMU +#include +#endif + +#ifdef CONFIG_MMU +size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; +int __asm_vector_usercopy(void *dst, void *src, size_t n); +int fallback_scalar_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +{ + size_t remain, copied; + + /* skip has_vector() check because it has been done by the asm */ + if (!may_use_simd()) + goto fallback; + + kernel_vector_begin(); + remain = __asm_vector_usercopy(dst, src, n); + kernel_vector_end(); + + if (remain) { + copied = n - remain; + dst += copied; + src += copied; + n = remain; + goto fallback; + } + + return remain; + +fallback: + return fallback_scalar_usercopy(dst, src, n); +} +#endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index a9d356d6c03cda8350717c7bb1d81cdae88d6362..bc22c078aba81a8170506eddb8642d3353ac461c 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,6 +3,8 @@ #include #include #include +#include +#include .macro fixup op reg addr lbl 100: @@ -11,6 +13,13 @@ .endm SYM_FUNC_START(__asm_copy_to_user) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy + tail enter_vector_usercopy +#endif +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ li t6, SR_SUM @@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user) sub a0, t5, a0 ret SYM_FUNC_END(__asm_copy_to_user) +SYM_FUNC_END(fallback_scalar_usercopy) EXPORT_SYMBOL(__asm_copy_to_user) SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S new file mode 100644 index 0000000000000000000000000000000000000000..51ab5588e9ff36b8b7dc80096be587d09da2881f --- /dev/null +++ b/arch/riscv/lib/uaccess_vector.S @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + .macro fixup op reg addr lbl +100: + \op \reg, \addr + _asm_extable 100b, \lbl + .endm + +SYM_FUNC_START(__asm_vector_usercopy) + /* Enable access to user memory */ + li t6, SR_SUM + csrs CSR_STATUS, t6 + +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + fixup vle8.v vData, (pSrc), 10f + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + fixup vse8.v vData, (pDst), 11f + add pDst, pDst, iVL + bnez iNum, loop + + /* Exception fixup for vector load is shared with normal exit */ +10: + /* Disable access to user memory */ + csrc CSR_STATUS, t6 + mv a0, iNum + ret + + /* Exception fixup code for vector store. */ +11: + /* Undo the subtraction after vle8.v */ + add iNum, iNum, iVL + /* Make sure the scalar fallback skip already processed bytes */ + csrr t2, CSR_VSTART + sub iNum, iNum, t2 + j 10b +SYM_FUNC_END(__asm_vector_usercopy) diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 0000000000000000000000000000000000000000..b28f2430e52fa50c199cd80153647fde33088e4c --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ +#include +#include +#include + +SYM_FUNC_START(xor_regs_2_) + vsetvli a3, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +SYM_FUNC_END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +SYM_FUNC_START(xor_regs_3_) + vsetvli a4, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +SYM_FUNC_END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +SYM_FUNC_START(xor_regs_4_) + vsetvli a5, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +SYM_FUNC_END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +SYM_FUNC_START(xor_regs_5_) + vsetvli a6, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +SYM_FUNC_END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 4e4e469b8dd66cfdf3e24346a514db2d3dd55773..843107f834b231a032c6853b5d58382ed6165a37 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -129,7 +129,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 35484d830fd6d7fe0a2a521bc736b1c5883afa51..dd1530af3ef15bf74cec58b5a4394918f6a8beb0 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset) +{ + if (unlikely(!offset || offset > MAX_REG_OFFSET)) + return 0; + + return *(unsigned long *)((unsigned long)regs + offset); +} + static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, unsigned long val) { @@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, return true; } +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); + unsigned long data, addr, offset; + + addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long)); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long *)addr >> (offset * 8); + + regs_set_gpr(regs, reg_data * sizeof(unsigned long), data); + + regs->epc = get_ex_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); } BUG(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a65937336cdc8840f6f8997f6a320cf97cf31577..32cad6a65ccd23431d63097a0906ca5b8de485f8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1060,7 +1060,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; #ifdef CONFIG_XIP_KERNEL +#ifdef CONFIG_64BIT kernel_map.page_offset = PAGE_OFFSET_L3; +#else + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); +#endif kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); @@ -1387,10 +1391,29 @@ void __init misc_mem_init(void) } #ifdef CONFIG_SPARSEMEM_VMEMMAP +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) +{ + pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, + unsigned long addr, unsigned long next) +{ + vmemmap_verify((pte_t *)pmdp, node, addr, next); + return 1; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node, NULL); + /* + * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we + * can't use hugepage mappings for 2-level page table because in case of + * memory hotplug, we are not able to update all the page tables with + * the new PMDs. + */ + return vmemmap_populate_hugepages(start, end, node, NULL); } #endif diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index e962518530373da4492d6a9b5f27062232b61ece..c301c8d291d2df54f93b5579c3e56ed1461e4dc7 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -441,6 +441,14 @@ static void __init kasan_shallow_populate(void *start, void *end) kasan_shallow_populate_pgd(vaddr, vend); } +#ifdef CONFIG_KASAN_VMALLOC +void __init kasan_populate_early_vm_area_shadow(void *start, unsigned long size) +{ + kasan_populate(kasan_mem_to_shadow(start), + kasan_mem_to_shadow(start + size)); +} +#endif + static void __init create_tmp_mapping(void) { void *ptr; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index e6659d7368b35403d1b91739080496bfc45442af..8d12b26f5ac37b659687981c2046f3d5b590753c 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -66,6 +66,11 @@ static inline void local_flush_tlb_range_asid(unsigned long start, local_flush_tlb_range_threshold_asid(start, size, stride, asid); } +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); +} + static void __ipi_flush_tlb_all(void *info) { local_flush_tlb_all(); @@ -93,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, - unsigned long size, unsigned long stride) +static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, + unsigned long start, unsigned long size, + unsigned long stride) { struct flush_tlb_range_data ftd; - const struct cpumask *cmask; - unsigned long asid = FLUSH_TLB_NO_ASID; bool broadcast; - if (mm) { - unsigned int cpuid; + if (cpumask_empty(cmask)) + return; - cmask = mm_cpumask(mm); - if (cpumask_empty(cmask)) - return; + if (cmask != cpu_online_mask) { + unsigned int cpuid; cpuid = get_cpu(); /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - - if (static_branch_unlikely(&use_asid_allocator)) - asid = atomic_long_read(&mm->context.id) & asid_mask; } else { - cmask = cpu_online_mask; broadcast = true; } @@ -135,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, local_flush_tlb_range_asid(start, size, stride, asid); } - if (mm) + if (cmask != cpu_online_mask) put_cpu(); } +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return static_branch_unlikely(&use_asid_allocator) ? + atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; +} + void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), + 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int page_size) { - __flush_tlb_range(mm, start, end - start, page_size); + __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), + start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + addr, PAGE_SIZE, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -185,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + start, end - start, stride_size); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); + __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + start, end - start, PAGE_SIZE); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + start, end - start, PMD_SIZE); } #endif + +bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + return true; +} + +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr) +{ + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); +} + +void arch_flush_tlb_batched_pending(struct mm_struct *mm) +{ + flush_tlb_mm(mm); +} + +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, + FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8f39f04247966d0b492ed378438b33d20474e99c..fe565f3a3a917d0da83dbd8329a503910fa41948 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -216,7 +216,6 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI - select KEXEC select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER select MMU_GATHER_RCU_TABLE_FREE @@ -443,7 +442,7 @@ config COMMAND_LINE_SIZE line. config COMPAT - def_bool y + def_bool n prompt "Kernel support for 31 bit emulation" select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION @@ -454,7 +453,9 @@ config COMPAT Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option (and some other stuff like libraries and such) is needed for - executing 31 bit applications. It is safe to say "Y". + executing 31 bit applications. + + If unsure say N. config SMP def_bool y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 85490d9373fc1446490945d7718770f3874d8736..cae2dd34fbb49d16ee020e72fb669010dca832f8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,6 +40,7 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y @@ -636,8 +637,9 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y -CONFIG_FSCACHE=m +CONFIG_FSCACHE=y CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index fb690fbbf54befbf06dc89fde45590e50788be9e..42b988873e5443df15b054d78610697fdf769293 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y @@ -621,8 +622,9 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y -CONFIG_FSCACHE=m +CONFIG_FSCACHE=y CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 47028450eee157b5b100e82b590dde720555c173..30d2a16876650e9c3ea32997f771131e6372e2fc 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -10,7 +10,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y CONFIG_MARCH_Z13=y -# CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 287bb88f76986e127388efd03c18d117bf4c417e..2686bee800e3d5a35f2d4918aac38f4020e0889c 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -11,6 +11,8 @@ /* I/O size constraints */ #define ZPCI_MAX_READ_SIZE 8 #define ZPCI_MAX_WRITE_SIZE 128 +#define ZPCI_BOUNDARY_SIZE (1 << 12) +#define ZPCI_BOUNDARY_MASK (ZPCI_BOUNDARY_SIZE - 1) /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 @@ -125,16 +127,18 @@ out: int zpci_write_block(volatile void __iomem *dst, const void *src, unsigned long len); -static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) +static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max) { - int count = len > max ? max : len, size = 1; + int offset = dst & ZPCI_BOUNDARY_MASK; + int size; - while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) { - dst = dst >> 1; - src = src >> 1; - size = size << 1; - } - return size; + size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max); + if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8)) + return size; + + if (size >= 8) + return 8; + return rounddown_pow_of_two(size); } static inline int zpci_memcpy_fromio(void *dst, @@ -144,9 +148,9 @@ static inline int zpci_memcpy_fromio(void *dst, int size, rc = 0; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) src, - (u64) dst, n, - ZPCI_MAX_READ_SIZE); + size = zpci_get_max_io_size((u64 __force) src, + (u64) dst, n, + ZPCI_MAX_READ_SIZE); rc = zpci_read_single(dst, src, size); if (rc) break; @@ -166,9 +170,9 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, return -EINVAL; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) dst, - (u64) src, n, - ZPCI_MAX_WRITE_SIZE); + size = zpci_get_max_io_size((u64 __force) dst, + (u64) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = zpci_write_block(dst, src, size); else diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 9e7c15fccfea92232e8370586071b71a5bf59500..a4f3449cc814162b9972faa67e33481b8a42a65f 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -208,7 +208,6 @@ void __load_fpu_regs(void) } clear_cpu_flag(CIF_FPU); } -EXPORT_SYMBOL(__load_fpu_regs); void load_fpu_regs(void) { diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 39a91b00438a7f6ba48fb541d8f24b51070391d2..bf8a672b15a41afd3a9e0384ad19f83eb8eadba1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -111,11 +111,11 @@ static void paicrypt_event_destroy(struct perf_event *event) mutex_unlock(&pai_reserve_mutex); } -static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel) +static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel) { if (kernel) nr += PAI_CRYPTO_MAXCTR; - return cpump->page[nr]; + return page[nr]; } /* Read the counter values. Return value from location in CMP. For event @@ -129,13 +129,13 @@ static u64 paicrypt_getdata(struct perf_event *event, bool kernel) int i; if (event->attr.config != PAI_CRYPTO_BASE) { - return paicrypt_getctr(cpump, + return paicrypt_getctr(cpump->page, event->attr.config - PAI_CRYPTO_BASE, kernel); } for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = paicrypt_getctr(cpump, i, kernel); + u64 val = paicrypt_getctr(cpump->page, i, kernel); if (!val) continue; @@ -317,10 +317,14 @@ static void paicrypt_start(struct perf_event *event, int flags) * Events are added, deleted and re-added when 2 or more events * are active at the same time. */ - if (!event->hw.last_tag) { - event->hw.last_tag = 1; - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); + if (!event->attr.sample_period) { /* Counting */ + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paicrypt_getall(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } + } else { /* Sampling */ + perf_sched_cb_inc(event->pmu); } } @@ -336,19 +340,18 @@ static int paicrypt_add(struct perf_event *event, int flags) local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); } cpump->event = event; - if (flags & PERF_EF_START && !event->attr.sample_period) { - /* Only counting needs initial counter value */ + if (flags & PERF_EF_START) paicrypt_start(event, PERF_EF_RELOAD); - } event->hw.state = 0; - if (event->attr.sample_period) - perf_sched_cb_inc(event->pmu); return 0; } static void paicrypt_stop(struct perf_event *event, int flags) { - paicrypt_read(event); + if (!event->attr.sample_period) /* Counting */ + paicrypt_read(event); + else /* Sampling */ + perf_sched_cb_dec(event->pmu); event->hw.state = PERF_HES_STOPPED; } @@ -357,11 +360,7 @@ static void paicrypt_del(struct perf_event *event, int flags) struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); struct paicrypt_map *cpump = mp->mapptr; - if (event->attr.sample_period) - perf_sched_cb_dec(event->pmu); - if (!event->attr.sample_period) - /* Only counting needs to read counter */ - paicrypt_stop(event, PERF_EF_UPDATE); + paicrypt_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); WRITE_ONCE(S390_lowcore.ccd, 0); @@ -373,8 +372,7 @@ static void paicrypt_del(struct perf_event *event, int flags) * 2 bytes: Number of counter * 8 bytes: Value of counter */ -static size_t paicrypt_copy(struct pai_userdata *userdata, - struct paicrypt_map *cpump, +static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page, bool exclude_user, bool exclude_kernel) { int i, outidx = 0; @@ -383,9 +381,9 @@ static size_t paicrypt_copy(struct pai_userdata *userdata, u64 val = 0; if (!exclude_kernel) - val += paicrypt_getctr(cpump, i, true); + val += paicrypt_getctr(page, i, true); if (!exclude_user) - val += paicrypt_getctr(cpump, i, false); + val += paicrypt_getctr(page, i, false); if (val) { userdata[outidx].num = i; userdata[outidx].value = val; @@ -395,25 +393,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata, return outidx * sizeof(struct pai_userdata); } -static int paicrypt_push_sample(void) +static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, + struct perf_event *event) { - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; - size_t rawsize; int overflow; - if (!cpump->event) /* No event active */ - return 0; - rawsize = paicrypt_copy(cpump->save, cpump, - cpump->event->attr.exclude_user, - cpump->event->attr.exclude_kernel); - if (!rawsize) /* No incremented counters */ - return 0; - /* Setup perf sample */ memset(®s, 0, sizeof(regs)); memset(&raw, 0, sizeof(raw)); @@ -444,6 +431,25 @@ static int paicrypt_push_sample(void) return overflow; } +/* Check if there is data to be saved on schedule out of a task. */ +static int paicrypt_have_sample(void) +{ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + struct perf_event *event = cpump->event; + size_t rawsize; + int rc = 0; + + if (!event) /* No event active */ + return 0; + rawsize = paicrypt_copy(cpump->save, cpump->page, + cpump->event->attr.exclude_user, + cpump->event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + rc = paicrypt_push_sample(rawsize, cpump, event); + return rc; +} + /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ @@ -453,7 +459,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch * results on schedule_out and if page was dirty, clear values. */ if (!sched_in) - paicrypt_push_sample(); + paicrypt_have_sample(); } /* Attribute definitions for paicrypt interface. As with other CPU diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index e7013a2e8960508566083cfa9344e6daa60638fb..af7f2b538c8fd47a19f73029264462bd6c4fdcbd 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -276,9 +276,9 @@ static int paiext_event_init(struct perf_event *event) return 0; } -static u64 paiext_getctr(struct paiext_map *cpump, int nr) +static u64 paiext_getctr(unsigned long *area, int nr) { - return cpump->area[nr]; + return area[nr]; } /* Read the counter values. Return value from location in buffer. For event @@ -292,10 +292,11 @@ static u64 paiext_getdata(struct perf_event *event) int i; if (event->attr.config != PAI_NNPA_BASE) - return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); + return paiext_getctr(cpump->area, + event->attr.config - PAI_NNPA_BASE); for (i = 1; i <= paiext_cnt; i++) - sum += paiext_getctr(cpump, i); + sum += paiext_getctr(cpump->area, i); return sum; } @@ -320,11 +321,15 @@ static void paiext_start(struct perf_event *event, int flags) { u64 sum; - if (event->hw.last_tag) - return; - event->hw.last_tag = 1; - sum = paiext_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); + if (!event->attr.sample_period) { /* Counting */ + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paiext_getall(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } + } else { /* Sampling */ + perf_sched_cb_inc(event->pmu); + } } static int paiext_add(struct perf_event *event, int flags) @@ -341,21 +346,19 @@ static int paiext_add(struct perf_event *event, int flags) debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", __func__, S390_lowcore.aicd, pcb->acc); } - if (flags & PERF_EF_START && !event->attr.sample_period) { - /* Only counting needs initial counter value */ + cpump->event = event; + if (flags & PERF_EF_START) paiext_start(event, PERF_EF_RELOAD); - } event->hw.state = 0; - if (event->attr.sample_period) { - cpump->event = event; - perf_sched_cb_inc(event->pmu); - } return 0; } static void paiext_stop(struct perf_event *event, int flags) { - paiext_read(event); + if (!event->attr.sample_period) /* Counting */ + paiext_read(event); + else /* Sampling */ + perf_sched_cb_dec(event->pmu); event->hw.state = PERF_HES_STOPPED; } @@ -365,12 +368,7 @@ static void paiext_del(struct perf_event *event, int flags) struct paiext_map *cpump = mp->mapptr; struct paiext_cb *pcb = cpump->paiext_cb; - if (event->attr.sample_period) - perf_sched_cb_dec(event->pmu); - if (!event->attr.sample_period) { - /* Only counting needs to read counter */ - paiext_stop(event, PERF_EF_UPDATE); - } + paiext_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { /* Disable CPU instruction lookup for PAIE1 control block */ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); @@ -386,13 +384,12 @@ static void paiext_del(struct perf_event *event, int flags) * 2 bytes: Number of counter * 8 bytes: Value of counter */ -static size_t paiext_copy(struct paiext_map *cpump) +static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area) { - struct pai_userdata *userdata = cpump->save; int i, outidx = 0; for (i = 1; i <= paiext_cnt; i++) { - u64 val = paiext_getctr(cpump, i); + u64 val = paiext_getctr(area, i); if (val) { userdata[outidx].num = i; @@ -418,21 +415,14 @@ static size_t paiext_copy(struct paiext_map *cpump) * sched_task() callback. That callback is not active after paiext_del() * returns and has deleted the event on that CPU. */ -static int paiext_push_sample(void) +static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, + struct perf_event *event) { - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; - size_t rawsize; int overflow; - rawsize = paiext_copy(cpump); - if (!rawsize) /* No incremented counters */ - return 0; - /* Setup perf sample */ memset(®s, 0, sizeof(regs)); memset(&raw, 0, sizeof(raw)); @@ -461,6 +451,23 @@ static int paiext_push_sample(void) return overflow; } +/* Check if there is data to be saved on schedule out of a task. */ +static int paiext_have_sample(void) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct perf_event *event = cpump->event; + size_t rawsize; + int rc = 0; + + if (!event) + return 0; + rawsize = paiext_copy(cpump->save, cpump->area); + if (rawsize) /* Incremented counters */ + rc = paiext_push_sample(rawsize, cpump, event); + return rc; +} + /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ @@ -470,7 +477,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * results on schedule_out and if page was dirty, clear values. */ if (!sched_in) - paiext_push_sample(); + paiext_have_sample(); } /* Attribute definitions for pai extension1 interface. As with other CPU diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2e6754b62b2093c789e1de27e1d3b7105e6ee462..f1897a8bb221078cf3b0da87c1d614241e0544b9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -917,7 +917,6 @@ static int s390_fpregs_set(struct task_struct *target, else memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); - /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab4098886e562f64cd54055d1a249ea09c61c6c9..ac4c78546d973713859079520552148ae7b2c0b7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -280,7 +280,6 @@ static void do_sigbus(struct pt_regs *regs) static void do_exception(struct pt_regs *regs, int access) { struct vm_area_struct *vma; - struct task_struct *tsk; unsigned long address; struct mm_struct *mm; enum fault_type type; @@ -289,7 +288,6 @@ static void do_exception(struct pt_regs *regs, int access) vm_fault_t fault; bool is_write; - tsk = current; /* * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. @@ -297,7 +295,7 @@ static void do_exception(struct pt_regs *regs, int access) clear_thread_flag(TIF_PER_TRAP); if (kprobe_page_fault(regs, 14)) return; - mm = tsk->mm; + mm = current->mm; address = get_fault_address(regs); is_write = fault_is_write(regs); type = get_fault_type(regs); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 5880893329310db5a6a65643b7641b4c6109973e..a90499c087f0c5e917c1f36072f56c0979951d1a 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -97,9 +97,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, return -EINVAL; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) dst, - (u64 __force) src, n, - ZPCI_MAX_WRITE_SIZE); + size = zpci_get_max_io_size((u64 __force) dst, + (u64 __force) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = __pcistb_mio_inuser(dst, src, size, &status); else @@ -242,9 +242,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst, u8 status; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) src, - (u64 __force) dst, n, - ZPCI_MAX_READ_SIZE); + size = zpci_get_max_io_size((u64 __force) src, + (u64 __force) dst, n, + ZPCI_MAX_READ_SIZE); rc = __pcilg_mio_inuser(dst, src, size, &status); if (rc) break; diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 0f279360838a4a7492c1ad86d38ac1f43173fc9f..30d117f9ad7eeafbc43bcaf921b94c941bb1cd8c 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -1220,7 +1220,7 @@ static int __init arch_setup(void) lcdc_info.ch[0].num_modes = ARRAY_SIZE(ecovec_dvi_modes); /* No backlight */ - gpio_backlight_data.fbdev = NULL; + gpio_backlight_data.dev = NULL; gpio_set_value(GPIO_PTA2, 1); gpio_set_value(GPIO_PTU1, 1); diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index cf59b98446e4d3e87c7fc9837ab5b06e9016f153..7b427c17fbfecb24d63e717023aad19ce1c953e8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -171,7 +171,8 @@ CONFIG_BTRFS_FS=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m -CONFIG_FSCACHE=m +CONFIG_NETFS_SUPPORT=m +CONFIG_FSCACHE=y CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 878b6b551bd2d0119dd17f2918b73d3ae4120a77..51112f54552b329a307577a5a047f97172d56381 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -90,6 +90,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, unsigned long len); #define flush_cache_vmap(start, end) local_flush_cache_all(NULL) +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) local_flush_cache_all(NULL) #define flush_dcache_mmap_lock(mapping) do { } while (0) diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile index 6e86644480488f692753a84950ea25e8235985c7..118744d349e21e43175017296ea978269a5a7ef4 100644 --- a/arch/sh/kernel/vsyscall/Makefile +++ b/arch/sh/kernel/vsyscall/Makefile @@ -1,11 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vsyscall.o vsyscall-syscall.o vsyscall-syms.o -$(obj)/vsyscall-syscall.o: \ - $(foreach F,trapa,$(obj)/vsyscall-$F.so) +$(obj)/vsyscall-syscall.o: $(obj)/vsyscall-trapa.so # Teach kbuild about targets -targets += $(foreach F,trapa,vsyscall-$F.o vsyscall-$F.so) +targets += vsyscall-trapa.o vsyscall-traps.so targets += vsyscall-note.o vsyscall.lds vsyscall-dummy.o # The DSO images are built using a special linker script diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h index f3b7270bf71b26ae7dcf77378d2b336363f307f5..9fee0ccfccb8e1b95a9d21ab293774fd6797eeb7 100644 --- a/arch/sparc/include/asm/cacheflush_32.h +++ b/arch/sparc/include/asm/cacheflush_32.h @@ -48,6 +48,7 @@ static inline void flush_dcache_page(struct page *page) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_cache_vmap(start, end) flush_cache_all() +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) flush_cache_all() /* When a context switch happens we must flush all user windows so that diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index 0e879004efff16e69afadb5867731960f20f9781..2b1261b77ecd1b9f93bdecbd1fa08a1985b08171 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -75,6 +75,7 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *, #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 3c38ca40a22bace28681258759f98f38b334c4bf..a84598568300d331e035f25a78977b515cb9fbc5 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c @@ -13,7 +13,10 @@ #include #include #include -#include +#include +#include +#include +#include #include #include @@ -456,7 +459,6 @@ static void sabre_pbm_init(struct pci_pbm_info *pbm, static const struct of_device_id sabre_match[]; static int sabre_probe(struct platform_device *op) { - const struct of_device_id *match; const struct linux_prom64_registers *pr_regs; struct device_node *dp = op->dev.of_node; struct pci_pbm_info *pbm; @@ -466,8 +468,7 @@ static int sabre_probe(struct platform_device *op) const u32 *vdma; u64 clear_irq; - match = of_match_device(sabre_match, &op->dev); - hummingbird_p = match && (match->data != NULL); + hummingbird_p = (uintptr_t)device_get_match_data(&op->dev); if (!hummingbird_p) { struct device_node *cpu_dp; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index 23b47f7fdb1d5290dd97377b5eb283dc34fa3b9b..5d8dd49495863dc64d03809373e32887102f7baa 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -11,7 +11,10 @@ #include #include #include -#include +#include +#include +#include +#include #include #include @@ -1459,15 +1462,13 @@ out_err: return err; } -static const struct of_device_id schizo_match[]; static int schizo_probe(struct platform_device *op) { - const struct of_device_id *match; + unsigned long chip_type = (unsigned long)device_get_match_data(&op->dev); - match = of_match_device(schizo_match, &op->dev); - if (!match) + if (!chip_type) return -EINVAL; - return __schizo_init(op, (unsigned long)match->data); + return __schizo_init(op, chip_type); } /* The ordering of this table is very important. Some Tomatillo diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index d08c3a0443f3a77f8fe8cb388c09ca2f5e16f67c..7f5eedf1f5e0ad3ff23fcdbcf882ece7eced120c 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -3,9 +3,6 @@ # Building vDSO images for sparc. # -VDSO64-$(CONFIG_SPARC64) := y -VDSOCOMPAT-$(CONFIG_COMPAT) := y - # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o @@ -13,22 +10,15 @@ vobjs-y := vdso-note.o vclock_gettime.o obj-y += vma.o # vDSO images to build -vdso_img-$(VDSO64-y) += 64 -vdso_img-$(VDSOCOMPAT-y) += 32 +obj-$(CONFIG_SPARC64) += vdso-image-64.o +obj-$(CONFIG_COMPAT) += vdso-image-32.o -vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) +vobjs := $(addprefix $(obj)/, $(vobjs-y)) $(obj)/vdso.o: $(obj)/vdso.so targets += vdso.lds $(vobjs-y) - -# Build the vDSO image C files and link them in. -vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) -vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) -vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) -obj-y += $(vdso_img_objs) -targets += $(vdso_img_cfiles) -targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) +targets += $(foreach x, 32 64, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) CPPFLAGS_vdso.lds += -P -C diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 00760fc3084334ed216706ee25db223aa88302bf..5edec175b9bfc92dfac8832fc3600b843407828b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -72,6 +72,7 @@ config X86 select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CPU_PASID if IOMMU_SVA select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE @@ -1969,6 +1970,11 @@ config INTEL_TDX_HOST depends on CPU_SUP_INTEL depends on X86_64 depends on KVM_INTEL + depends on X86_X2APIC + select ARCH_KEEP_MEMBLOCK + depends on CONTIG_ALLOC + depends on !KEXEC_CORE + depends on X86_MCE help Intel Trust Domain Extensions (TDX) protects guest VMs from malicious host and certain physical attacks. This option enables necessary TDX diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index 78e413269791ee6e967873b6f417c418b771a820..1655aa56a0a5126ab6dcc3ec8221f9fbf8822eb5 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -22,13 +22,13 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len, */ switch (pg_level) { case PG_LEVEL_4K: - page_size = 0; + page_size = TDX_PS_4K; break; case PG_LEVEL_2M: - page_size = 1; + page_size = TDX_PS_2M; break; case PG_LEVEL_1G: - page_size = 2; + page_size = TDX_PS_1G; break; default: return 0; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 632c26cdeeda5ddc6d71e599acd65a278dd8dc01..29cb275a219d7fb38fa0d16e6ba48e91c9d032b4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,7 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -499,6 +500,7 @@ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 737a52b89e64c11b22a7b902f308ee155eb6be33..f1bd7b91b3c63735738825f15cd3c82fca7579ce 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -541,6 +541,9 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* KeyID partitioning between MKTME and TDX */ +#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 + /* * AMD64 MSRs. Not complete. See the architecture manual for a more * complete list. diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index ccce7ebd8677287359c4de9271fe29ecf6793d31..fdfd41511b02118faa14bab179dbab9d2f588d18 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -55,6 +55,12 @@ (TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \ TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15) +/* TDX supported page sizes from the TDX module ABI. */ +#define TDX_PS_4K 0 +#define TDX_PS_2M 1 +#define TDX_PS_1G 2 +#define TDX_PS_NR (TDX_PS_1G + 1) + #ifndef __ASSEMBLY__ #include diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index f3d5305a60fc50b13708d80ffecab0a529239ee0..eba178996d8459b2ead1800e83157346f9c50c04 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -24,8 +24,16 @@ #define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP) #define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD) +/* + * TDX module SEAMCALL leaf function error codes + */ +#define TDX_SUCCESS 0ULL +#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL + #ifndef __ASSEMBLY__ +#include + /* * Used by the #VE exception handler to gather the #VE exception * info from the TDX module. This is a software only structure @@ -83,6 +91,36 @@ static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, u64 __seamcall(u64 fn, struct tdx_module_args *args); u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); +void tdx_init(void); + +#include + +typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); + +static inline u64 sc_retry(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + int retry = RDRAND_RETRY_LOOPS; + u64 ret; + + do { + ret = func(fn, args); + } while (ret == TDX_RND_NO_ENTROPY && --retry); + + return ret; +} + +#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args)) +#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args)) +#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args)) +int tdx_cpu_enable(void); +int tdx_enable(void); +const char *tdx_dump_mce_info(struct mce *m); +#else +static inline void tdx_init(void) { } +static inline int tdx_cpu_enable(void) { return -ENODEV; } +static inline int tdx_enable(void) { return -ENODEV; } +static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 94bff381ef20298f0f31a4753a085daca40bf03a..0b97bcde70c6102a4b82b561c3256ec53b614770 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "cpu.h" @@ -1986,6 +1987,7 @@ static __init void identify_boot_cpu(void) setup_cr_pinning(); tsx_init(); + tdx_init(); lkgs_init(); } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index fd5ce12c4f9aa502c200a78dda3741ba70c068dc..bc39252bc54f2ec8a834961639d180b3f84c13ac 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "internal.h" @@ -229,12 +230,20 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } +static const char *mce_dump_aux_info(struct mce *m) +{ + if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) + return tdx_dump_mce_info(m); + + return NULL; +} + static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) { struct llist_node *pending; struct mce_evt_llist *l; int apei_err = 0; - struct page *p; + const char *memmsg; /* * Allow instrumentation around external facilities usage. Not that it @@ -285,6 +294,11 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) } if (exp) pr_emerg(HW_ERR "Machine check: %s\n", exp); + + memmsg = mce_dump_aux_info(final); + if (memmsg) + pr_emerg(HW_ERR "Machine check: %s\n", memmsg); + if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mca_cfg.panic_timeout; @@ -297,6 +311,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) */ if (kexec_crash_loaded()) { if (final && (final->status & MCI_STATUS_ADDRV)) { + struct page *p; p = pfn_to_online_page(final->addr >> PAGE_SHIFT); if (p) SetPageHWPoison(p); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 8ff2bf921519dd4da308636c121bb0e16968abe7..a38d0c93a66e825a38987a65fdd09ad97bbae2ec 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1438,7 +1438,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { - if (unlikely(mc146818_get_time(&curr_time) < 0)) { + if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 1309b9b053386b8470619511d9c435b9d7ab68d0..2e7066980f3e8bf665613ab4fd2f9ccb03ad7627 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - if (mc146818_get_time(&tm)) { + if (mc146818_get_time(&tm, 1000)) { pr_err("Unable to read current time from RTC\n"); now->tv_sec = now->tv_nsec = 0; return; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ec2c21a1844e3e8e128ead89213588ec6828bca9..84201071dfacd186da34cdca12cbda41a39eedf1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1031,6 +1031,8 @@ void __init setup_arch(char **cmdline_p) * * Moreover, on machines with SandyBridge graphics or in setups that use * crashkernel the entire 1M is reserved anyway. + * + * Note the host kernel TDX also requires the first 1MB being reserved. */ x86_platform.realmode_reserve(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b0737a15c470251b1222a52afe28e0d0a82b74d2..c3b2f863acf0f3f28c7402c86de8cbaa47eb930c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -566,7 +566,7 @@ static bool fixup_iopl_exception(struct pt_regs *regs) */ static bool try_fixup_enqcmd_gp(void) { -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID u32 pasid; /* @@ -592,7 +592,7 @@ static bool try_fixup_enqcmd_gp(void) if (!mm_valid_pasid(current->mm)) return false; - pasid = current->mm->pasid; + pasid = mm_get_enqcmd_pasid(current->mm); /* * Did this thread already have its PASID activated? diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile index 46ef8f73aebbb1520b12233ac208db33355b0678..90da47eb85eec3a0f4d64b2ffa796f6d0c71013f 100644 --- a/arch/x86/virt/vmx/tdx/Makefile +++ b/arch/x86/virt/vmx/tdx/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += seamcall.o +obj-y += seamcall.o tdx.o diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c new file mode 100644 index 0000000000000000000000000000000000000000..4d6826a76f7881895891fc5aa8ba9d0c8f2bc248 --- /dev/null +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -0,0 +1,1492 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(c) 2023 Intel Corporation. + * + * Intel Trusted Domain Extensions (TDX) support + */ + +#define pr_fmt(fmt) "virt/tdx: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tdx.h" + +static u32 tdx_global_keyid __ro_after_init; +static u32 tdx_guest_keyid_start __ro_after_init; +static u32 tdx_nr_guest_keyids __ro_after_init; + +static DEFINE_PER_CPU(bool, tdx_lp_initialized); + +static struct tdmr_info_list tdx_tdmr_list; + +static enum tdx_module_status_t tdx_module_status; +static DEFINE_MUTEX(tdx_module_lock); + +/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */ +static LIST_HEAD(tdx_memlist); + +typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); + +static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) +{ + pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err); +} + +static inline void seamcall_err_ret(u64 fn, u64 err, + struct tdx_module_args *args) +{ + seamcall_err(fn, err, args); + pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n", + args->rcx, args->rdx, args->r8); + pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n", + args->r9, args->r10, args->r11); +} + +static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) +{ + u64 sret = sc_retry(func, fn, args); + + if (sret == TDX_SUCCESS) + return 0; + + if (sret == TDX_SEAMCALL_VMFAILINVALID) + return -ENODEV; + + if (sret == TDX_SEAMCALL_GP) + return -EOPNOTSUPP; + + if (sret == TDX_SEAMCALL_UD) + return -EACCES; + + err_func(fn, sret, args); + return -EIO; +} + +#define seamcall_prerr(__fn, __args) \ + sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args)) + +#define seamcall_prerr_ret(__fn, __args) \ + sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args)) + +/* + * Do the module global initialization once and return its result. + * It can be done on any cpu. It's always called with interrupts + * disabled. + */ +static int try_init_module_global(void) +{ + struct tdx_module_args args = {}; + static DEFINE_RAW_SPINLOCK(sysinit_lock); + static bool sysinit_done; + static int sysinit_ret; + + lockdep_assert_irqs_disabled(); + + raw_spin_lock(&sysinit_lock); + + if (sysinit_done) + goto out; + + /* RCX is module attributes and all bits are reserved */ + args.rcx = 0; + sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args); + + /* + * The first SEAMCALL also detects the TDX module, thus + * it can fail due to the TDX module is not loaded. + * Dump message to let the user know. + */ + if (sysinit_ret == -ENODEV) + pr_err("module not loaded\n"); + + sysinit_done = true; +out: + raw_spin_unlock(&sysinit_lock); + return sysinit_ret; +} + +/** + * tdx_cpu_enable - Enable TDX on local cpu + * + * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module + * global initialization SEAMCALL if not done) on local cpu to make this + * cpu be ready to run any other SEAMCALLs. + * + * Always call this function via IPI function calls. + * + * Return 0 on success, otherwise errors. + */ +int tdx_cpu_enable(void) +{ + struct tdx_module_args args = {}; + int ret; + + if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) + return -ENODEV; + + lockdep_assert_irqs_disabled(); + + if (__this_cpu_read(tdx_lp_initialized)) + return 0; + + /* + * The TDX module global initialization is the very first step + * to enable TDX. Need to do it first (if hasn't been done) + * before the per-cpu initialization. + */ + ret = try_init_module_global(); + if (ret) + return ret; + + ret = seamcall_prerr(TDH_SYS_LP_INIT, &args); + if (ret) + return ret; + + __this_cpu_write(tdx_lp_initialized, true); + + return 0; +} +EXPORT_SYMBOL_GPL(tdx_cpu_enable); + +/* + * Add a memory region as a TDX memory block. The caller must make sure + * all memory regions are added in address ascending order and don't + * overlap. + */ +static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn, + unsigned long end_pfn, int nid) +{ + struct tdx_memblock *tmb; + + tmb = kmalloc(sizeof(*tmb), GFP_KERNEL); + if (!tmb) + return -ENOMEM; + + INIT_LIST_HEAD(&tmb->list); + tmb->start_pfn = start_pfn; + tmb->end_pfn = end_pfn; + tmb->nid = nid; + + /* @tmb_list is protected by mem_hotplug_lock */ + list_add_tail(&tmb->list, tmb_list); + return 0; +} + +static void free_tdx_memlist(struct list_head *tmb_list) +{ + /* @tmb_list is protected by mem_hotplug_lock */ + while (!list_empty(tmb_list)) { + struct tdx_memblock *tmb = list_first_entry(tmb_list, + struct tdx_memblock, list); + + list_del(&tmb->list); + kfree(tmb); + } +} + +/* + * Ensure that all memblock memory regions are convertible to TDX + * memory. Once this has been established, stash the memblock + * ranges off in a secondary structure because memblock is modified + * in memory hotplug while TDX memory regions are fixed. + */ +static int build_tdx_memlist(struct list_head *tmb_list) +{ + unsigned long start_pfn, end_pfn; + int i, nid, ret; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + /* + * The first 1MB is not reported as TDX convertible memory. + * Although the first 1MB is always reserved and won't end up + * to the page allocator, it is still in memblock's memory + * regions. Skip them manually to exclude them as TDX memory. + */ + start_pfn = max(start_pfn, PHYS_PFN(SZ_1M)); + if (start_pfn >= end_pfn) + continue; + + /* + * Add the memory regions as TDX memory. The regions in + * memblock has already guaranteed they are in address + * ascending order and don't overlap. + */ + ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid); + if (ret) + goto err; + } + + return 0; +err: + free_tdx_memlist(tmb_list); + return ret; +} + +static int read_sys_metadata_field(u64 field_id, u64 *data) +{ + struct tdx_module_args args = {}; + int ret; + + /* + * TDH.SYS.RD -- reads one global metadata field + * - RDX (in): the field to read + * - R8 (out): the field data + */ + args.rdx = field_id; + ret = seamcall_prerr_ret(TDH_SYS_RD, &args); + if (ret) + return ret; + + *data = args.r8; + + return 0; +} + +static int read_sys_metadata_field16(u64 field_id, + int offset, + struct tdx_tdmr_sysinfo *ts) +{ + u16 *ts_member = ((void *)ts) + offset; + u64 tmp; + int ret; + + if (WARN_ON_ONCE(MD_FIELD_ID_ELE_SIZE_CODE(field_id) != + MD_FIELD_ID_ELE_SIZE_16BIT)) + return -EINVAL; + + ret = read_sys_metadata_field(field_id, &tmp); + if (ret) + return ret; + + *ts_member = tmp; + + return 0; +} + +struct field_mapping { + u64 field_id; + int offset; +}; + +#define TD_SYSINFO_MAP(_field_id, _offset) \ + { .field_id = MD_FIELD_ID_##_field_id, \ + .offset = offsetof(struct tdx_tdmr_sysinfo, _offset) } + +/* Map TD_SYSINFO fields into 'struct tdx_tdmr_sysinfo': */ +static const struct field_mapping fields[] = { + TD_SYSINFO_MAP(MAX_TDMRS, max_tdmrs), + TD_SYSINFO_MAP(MAX_RESERVED_PER_TDMR, max_reserved_per_tdmr), + TD_SYSINFO_MAP(PAMT_4K_ENTRY_SIZE, pamt_entry_size[TDX_PS_4K]), + TD_SYSINFO_MAP(PAMT_2M_ENTRY_SIZE, pamt_entry_size[TDX_PS_2M]), + TD_SYSINFO_MAP(PAMT_1G_ENTRY_SIZE, pamt_entry_size[TDX_PS_1G]), +}; + +static int get_tdx_tdmr_sysinfo(struct tdx_tdmr_sysinfo *tdmr_sysinfo) +{ + int ret; + int i; + + /* Populate 'tdmr_sysinfo' fields using the mapping structure above: */ + for (i = 0; i < ARRAY_SIZE(fields); i++) { + ret = read_sys_metadata_field16(fields[i].field_id, + fields[i].offset, + tdmr_sysinfo); + if (ret) + return ret; + } + + return 0; +} + +/* Calculate the actual TDMR size */ +static int tdmr_size_single(u16 max_reserved_per_tdmr) +{ + int tdmr_sz; + + /* + * The actual size of TDMR depends on the maximum + * number of reserved areas. + */ + tdmr_sz = sizeof(struct tdmr_info); + tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr; + + return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT); +} + +static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list, + struct tdx_tdmr_sysinfo *tdmr_sysinfo) +{ + size_t tdmr_sz, tdmr_array_sz; + void *tdmr_array; + + tdmr_sz = tdmr_size_single(tdmr_sysinfo->max_reserved_per_tdmr); + tdmr_array_sz = tdmr_sz * tdmr_sysinfo->max_tdmrs; + + /* + * To keep things simple, allocate all TDMRs together. + * The buffer needs to be physically contiguous to make + * sure each TDMR is physically contiguous. + */ + tdmr_array = alloc_pages_exact(tdmr_array_sz, + GFP_KERNEL | __GFP_ZERO); + if (!tdmr_array) + return -ENOMEM; + + tdmr_list->tdmrs = tdmr_array; + + /* + * Keep the size of TDMR to find the target TDMR + * at a given index in the TDMR list. + */ + tdmr_list->tdmr_sz = tdmr_sz; + tdmr_list->max_tdmrs = tdmr_sysinfo->max_tdmrs; + tdmr_list->nr_consumed_tdmrs = 0; + + return 0; +} + +static void free_tdmr_list(struct tdmr_info_list *tdmr_list) +{ + free_pages_exact(tdmr_list->tdmrs, + tdmr_list->max_tdmrs * tdmr_list->tdmr_sz); +} + +/* Get the TDMR from the list at the given index. */ +static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list, + int idx) +{ + int tdmr_info_offset = tdmr_list->tdmr_sz * idx; + + return (void *)tdmr_list->tdmrs + tdmr_info_offset; +} + +#define TDMR_ALIGNMENT SZ_1G +#define TDMR_ALIGN_DOWN(_addr) ALIGN_DOWN((_addr), TDMR_ALIGNMENT) +#define TDMR_ALIGN_UP(_addr) ALIGN((_addr), TDMR_ALIGNMENT) + +static inline u64 tdmr_end(struct tdmr_info *tdmr) +{ + return tdmr->base + tdmr->size; +} + +/* + * Take the memory referenced in @tmb_list and populate the + * preallocated @tdmr_list, following all the special alignment + * and size rules for TDMR. + */ +static int fill_out_tdmrs(struct list_head *tmb_list, + struct tdmr_info_list *tdmr_list) +{ + struct tdx_memblock *tmb; + int tdmr_idx = 0; + + /* + * Loop over TDX memory regions and fill out TDMRs to cover them. + * To keep it simple, always try to use one TDMR to cover one + * memory region. + * + * In practice TDX supports at least 64 TDMRs. A 2-socket system + * typically only consumes less than 10 of those. This code is + * dumb and simple and may use more TMDRs than is strictly + * required. + */ + list_for_each_entry(tmb, tmb_list, list) { + struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx); + u64 start, end; + + start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn)); + end = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn)); + + /* + * A valid size indicates the current TDMR has already + * been filled out to cover the previous memory region(s). + */ + if (tdmr->size) { + /* + * Loop to the next if the current memory region + * has already been fully covered. + */ + if (end <= tdmr_end(tdmr)) + continue; + + /* Otherwise, skip the already covered part. */ + if (start < tdmr_end(tdmr)) + start = tdmr_end(tdmr); + + /* + * Create a new TDMR to cover the current memory + * region, or the remaining part of it. + */ + tdmr_idx++; + if (tdmr_idx >= tdmr_list->max_tdmrs) { + pr_warn("initialization failed: TDMRs exhausted.\n"); + return -ENOSPC; + } + + tdmr = tdmr_entry(tdmr_list, tdmr_idx); + } + + tdmr->base = start; + tdmr->size = end - start; + } + + /* @tdmr_idx is always the index of the last valid TDMR. */ + tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1; + + /* + * Warn early that kernel is about to run out of TDMRs. + * + * This is an indication that TDMR allocation has to be + * reworked to be smarter to not run into an issue. + */ + if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN) + pr_warn("consumed TDMRs reaching limit: %d used out of %d\n", + tdmr_list->nr_consumed_tdmrs, + tdmr_list->max_tdmrs); + + return 0; +} + +/* + * Calculate PAMT size given a TDMR and a page size. The returned + * PAMT size is always aligned up to 4K page boundary. + */ +static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz, + u16 pamt_entry_size) +{ + unsigned long pamt_sz, nr_pamt_entries; + + switch (pgsz) { + case TDX_PS_4K: + nr_pamt_entries = tdmr->size >> PAGE_SHIFT; + break; + case TDX_PS_2M: + nr_pamt_entries = tdmr->size >> PMD_SHIFT; + break; + case TDX_PS_1G: + nr_pamt_entries = tdmr->size >> PUD_SHIFT; + break; + default: + WARN_ON_ONCE(1); + return 0; + } + + pamt_sz = nr_pamt_entries * pamt_entry_size; + /* TDX requires PAMT size must be 4K aligned */ + pamt_sz = ALIGN(pamt_sz, PAGE_SIZE); + + return pamt_sz; +} + +/* + * Locate a NUMA node which should hold the allocation of the @tdmr + * PAMT. This node will have some memory covered by the TDMR. The + * relative amount of memory covered is not considered. + */ +static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list) +{ + struct tdx_memblock *tmb; + + /* + * A TDMR must cover at least part of one TMB. That TMB will end + * after the TDMR begins. But, that TMB may have started before + * the TDMR. Find the next 'tmb' that _ends_ after this TDMR + * begins. Ignore 'tmb' start addresses. They are irrelevant. + */ + list_for_each_entry(tmb, tmb_list, list) { + if (tmb->end_pfn > PHYS_PFN(tdmr->base)) + return tmb->nid; + } + + /* + * Fall back to allocating the TDMR's metadata from node 0 when + * no TDX memory block can be found. This should never happen + * since TDMRs originate from TDX memory blocks. + */ + pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n", + tdmr->base, tdmr_end(tdmr)); + return 0; +} + +/* + * Allocate PAMTs from the local NUMA node of some memory in @tmb_list + * within @tdmr, and set up PAMTs for @tdmr. + */ +static int tdmr_set_up_pamt(struct tdmr_info *tdmr, + struct list_head *tmb_list, + u16 pamt_entry_size[]) +{ + unsigned long pamt_base[TDX_PS_NR]; + unsigned long pamt_size[TDX_PS_NR]; + unsigned long tdmr_pamt_base; + unsigned long tdmr_pamt_size; + struct page *pamt; + int pgsz, nid; + + nid = tdmr_get_nid(tdmr, tmb_list); + + /* + * Calculate the PAMT size for each TDX supported page size + * and the total PAMT size. + */ + tdmr_pamt_size = 0; + for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { + pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz, + pamt_entry_size[pgsz]); + tdmr_pamt_size += pamt_size[pgsz]; + } + + /* + * Allocate one chunk of physically contiguous memory for all + * PAMTs. This helps minimize the PAMT's use of reserved areas + * in overlapped TDMRs. + */ + pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL, + nid, &node_online_map); + if (!pamt) + return -ENOMEM; + + /* + * Break the contiguous allocation back up into the + * individual PAMTs for each page size. + */ + tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT; + for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { + pamt_base[pgsz] = tdmr_pamt_base; + tdmr_pamt_base += pamt_size[pgsz]; + } + + tdmr->pamt_4k_base = pamt_base[TDX_PS_4K]; + tdmr->pamt_4k_size = pamt_size[TDX_PS_4K]; + tdmr->pamt_2m_base = pamt_base[TDX_PS_2M]; + tdmr->pamt_2m_size = pamt_size[TDX_PS_2M]; + tdmr->pamt_1g_base = pamt_base[TDX_PS_1G]; + tdmr->pamt_1g_size = pamt_size[TDX_PS_1G]; + + return 0; +} + +static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base, + unsigned long *pamt_size) +{ + unsigned long pamt_bs, pamt_sz; + + /* + * The PAMT was allocated in one contiguous unit. The 4K PAMT + * should always point to the beginning of that allocation. + */ + pamt_bs = tdmr->pamt_4k_base; + pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size; + + WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK)); + + *pamt_base = pamt_bs; + *pamt_size = pamt_sz; +} + +static void tdmr_do_pamt_func(struct tdmr_info *tdmr, + void (*pamt_func)(unsigned long base, unsigned long size)) +{ + unsigned long pamt_base, pamt_size; + + tdmr_get_pamt(tdmr, &pamt_base, &pamt_size); + + /* Do nothing if PAMT hasn't been allocated for this TDMR */ + if (!pamt_size) + return; + + if (WARN_ON_ONCE(!pamt_base)) + return; + + pamt_func(pamt_base, pamt_size); +} + +static void free_pamt(unsigned long pamt_base, unsigned long pamt_size) +{ + free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT); +} + +static void tdmr_free_pamt(struct tdmr_info *tdmr) +{ + tdmr_do_pamt_func(tdmr, free_pamt); +} + +static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list) +{ + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) + tdmr_free_pamt(tdmr_entry(tdmr_list, i)); +} + +/* Allocate and set up PAMTs for all TDMRs */ +static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list, + struct list_head *tmb_list, + u16 pamt_entry_size[]) +{ + int i, ret = 0; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list, + pamt_entry_size); + if (ret) + goto err; + } + + return 0; +err: + tdmrs_free_pamt_all(tdmr_list); + return ret; +} + +/* + * Convert TDX private pages back to normal by using MOVDIR64B to + * clear these pages. Note this function doesn't flush cache of + * these TDX private pages. The caller should make sure of that. + */ +static void reset_tdx_pages(unsigned long base, unsigned long size) +{ + const void *zero_page = (const void *)page_address(ZERO_PAGE(0)); + unsigned long phys, end; + + end = base + size; + for (phys = base; phys < end; phys += 64) + movdir64b(__va(phys), zero_page); + + /* + * MOVDIR64B uses WC protocol. Use memory barrier to + * make sure any later user of these pages sees the + * updated data. + */ + mb(); +} + +static void tdmr_reset_pamt(struct tdmr_info *tdmr) +{ + tdmr_do_pamt_func(tdmr, reset_tdx_pages); +} + +static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list) +{ + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) + tdmr_reset_pamt(tdmr_entry(tdmr_list, i)); +} + +static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list) +{ + unsigned long pamt_size = 0; + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + unsigned long base, size; + + tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size); + pamt_size += size; + } + + return pamt_size / 1024; +} + +static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr, + u64 size, u16 max_reserved_per_tdmr) +{ + struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas; + int idx = *p_idx; + + /* Reserved area must be 4K aligned in offset and size */ + if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK)) + return -EINVAL; + + if (idx >= max_reserved_per_tdmr) { + pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n", + tdmr->base, tdmr_end(tdmr)); + return -ENOSPC; + } + + /* + * Consume one reserved area per call. Make no effort to + * optimize or reduce the number of reserved areas which are + * consumed by contiguous reserved areas, for instance. + */ + rsvd_areas[idx].offset = addr - tdmr->base; + rsvd_areas[idx].size = size; + + *p_idx = idx + 1; + + return 0; +} + +/* + * Go through @tmb_list to find holes between memory areas. If any of + * those holes fall within @tdmr, set up a TDMR reserved area to cover + * the hole. + */ +static int tdmr_populate_rsvd_holes(struct list_head *tmb_list, + struct tdmr_info *tdmr, + int *rsvd_idx, + u16 max_reserved_per_tdmr) +{ + struct tdx_memblock *tmb; + u64 prev_end; + int ret; + + /* + * Start looking for reserved blocks at the + * beginning of the TDMR. + */ + prev_end = tdmr->base; + list_for_each_entry(tmb, tmb_list, list) { + u64 start, end; + + start = PFN_PHYS(tmb->start_pfn); + end = PFN_PHYS(tmb->end_pfn); + + /* Break if this region is after the TDMR */ + if (start >= tdmr_end(tdmr)) + break; + + /* Exclude regions before this TDMR */ + if (end < tdmr->base) + continue; + + /* + * Skip over memory areas that + * have already been dealt with. + */ + if (start <= prev_end) { + prev_end = end; + continue; + } + + /* Add the hole before this region */ + ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end, + start - prev_end, + max_reserved_per_tdmr); + if (ret) + return ret; + + prev_end = end; + } + + /* Add the hole after the last region if it exists. */ + if (prev_end < tdmr_end(tdmr)) { + ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end, + tdmr_end(tdmr) - prev_end, + max_reserved_per_tdmr); + if (ret) + return ret; + } + + return 0; +} + +/* + * Go through @tdmr_list to find all PAMTs. If any of those PAMTs + * overlaps with @tdmr, set up a TDMR reserved area to cover the + * overlapping part. + */ +static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list, + struct tdmr_info *tdmr, + int *rsvd_idx, + u16 max_reserved_per_tdmr) +{ + int i, ret; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + struct tdmr_info *tmp = tdmr_entry(tdmr_list, i); + unsigned long pamt_base, pamt_size, pamt_end; + + tdmr_get_pamt(tmp, &pamt_base, &pamt_size); + /* Each TDMR must already have PAMT allocated */ + WARN_ON_ONCE(!pamt_size || !pamt_base); + + pamt_end = pamt_base + pamt_size; + /* Skip PAMTs outside of the given TDMR */ + if ((pamt_end <= tdmr->base) || + (pamt_base >= tdmr_end(tdmr))) + continue; + + /* Only mark the part within the TDMR as reserved */ + if (pamt_base < tdmr->base) + pamt_base = tdmr->base; + if (pamt_end > tdmr_end(tdmr)) + pamt_end = tdmr_end(tdmr); + + ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base, + pamt_end - pamt_base, + max_reserved_per_tdmr); + if (ret) + return ret; + } + + return 0; +} + +/* Compare function called by sort() for TDMR reserved areas */ +static int rsvd_area_cmp_func(const void *a, const void *b) +{ + struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a; + struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b; + + if (r1->offset + r1->size <= r2->offset) + return -1; + if (r1->offset >= r2->offset + r2->size) + return 1; + + /* Reserved areas cannot overlap. The caller must guarantee. */ + WARN_ON_ONCE(1); + return -1; +} + +/* + * Populate reserved areas for the given @tdmr, including memory holes + * (via @tmb_list) and PAMTs (via @tdmr_list). + */ +static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr, + struct list_head *tmb_list, + struct tdmr_info_list *tdmr_list, + u16 max_reserved_per_tdmr) +{ + int ret, rsvd_idx = 0; + + ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx, + max_reserved_per_tdmr); + if (ret) + return ret; + + ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx, + max_reserved_per_tdmr); + if (ret) + return ret; + + /* TDX requires reserved areas listed in address ascending order */ + sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area), + rsvd_area_cmp_func, NULL); + + return 0; +} + +/* + * Populate reserved areas for all TDMRs in @tdmr_list, including memory + * holes (via @tmb_list) and PAMTs. + */ +static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list, + struct list_head *tmb_list, + u16 max_reserved_per_tdmr) +{ + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + int ret; + + ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i), + tmb_list, tdmr_list, max_reserved_per_tdmr); + if (ret) + return ret; + } + + return 0; +} + +/* + * Construct a list of TDMRs on the preallocated space in @tdmr_list + * to cover all TDX memory regions in @tmb_list based on the TDX module + * TDMR global information in @tdmr_sysinfo. + */ +static int construct_tdmrs(struct list_head *tmb_list, + struct tdmr_info_list *tdmr_list, + struct tdx_tdmr_sysinfo *tdmr_sysinfo) +{ + int ret; + + ret = fill_out_tdmrs(tmb_list, tdmr_list); + if (ret) + return ret; + + ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, + tdmr_sysinfo->pamt_entry_size); + if (ret) + return ret; + + ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list, + tdmr_sysinfo->max_reserved_per_tdmr); + if (ret) + tdmrs_free_pamt_all(tdmr_list); + + /* + * The tdmr_info_list is read-only from here on out. + * Ensure that these writes are seen by other CPUs. + * Pairs with a smp_rmb() in is_pamt_page(). + */ + smp_wmb(); + + return ret; +} + +static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid) +{ + struct tdx_module_args args = {}; + u64 *tdmr_pa_array; + size_t array_sz; + int i, ret; + + /* + * TDMRs are passed to the TDX module via an array of physical + * addresses of each TDMR. The array itself also has certain + * alignment requirement. + */ + array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64); + array_sz = roundup_pow_of_two(array_sz); + if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT) + array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT; + + tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL); + if (!tdmr_pa_array) + return -ENOMEM; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) + tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i)); + + args.rcx = __pa(tdmr_pa_array); + args.rdx = tdmr_list->nr_consumed_tdmrs; + args.r8 = global_keyid; + ret = seamcall_prerr(TDH_SYS_CONFIG, &args); + + /* Free the array as it is not required anymore. */ + kfree(tdmr_pa_array); + + return ret; +} + +static int do_global_key_config(void *unused) +{ + struct tdx_module_args args = {}; + + return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args); +} + +/* + * Attempt to configure the global KeyID on all physical packages. + * + * This requires running code on at least one CPU in each package. + * TDMR initialization) will fail will fail if any package in the + * system has no online CPUs. + * + * This code takes no affirmative steps to online CPUs. Callers (aka. + * KVM) can ensure success by ensuring sufficient CPUs are online and + * can run SEAMCALLs. + */ +static int config_global_keyid(void) +{ + cpumask_var_t packages; + int cpu, ret = -EINVAL; + + if (!zalloc_cpumask_var(&packages, GFP_KERNEL)) + return -ENOMEM; + + /* + * Hardware doesn't guarantee cache coherency across different + * KeyIDs. The kernel needs to flush PAMT's dirty cachelines + * (associated with KeyID 0) before the TDX module can use the + * global KeyID to access the PAMT. Given PAMTs are potentially + * large (~1/256th of system RAM), just use WBINVD. + */ + wbinvd_on_all_cpus(); + + for_each_online_cpu(cpu) { + /* + * The key configuration only needs to be done once per + * package and will return an error if configured more + * than once. Avoid doing it multiple times per package. + */ + if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu), + packages)) + continue; + + /* + * TDH.SYS.KEY.CONFIG cannot run concurrently on + * different cpus. Do it one by one. + */ + ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true); + if (ret) + break; + } + + free_cpumask_var(packages); + return ret; +} + +static int init_tdmr(struct tdmr_info *tdmr) +{ + u64 next; + + /* + * Initializing a TDMR can be time consuming. To avoid long + * SEAMCALLs, the TDX module may only initialize a part of the + * TDMR in each call. + */ + do { + struct tdx_module_args args = { + .rcx = tdmr->base, + }; + int ret; + + ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args); + if (ret) + return ret; + /* + * RDX contains 'next-to-initialize' address if + * TDH.SYS.TDMR.INIT did not fully complete and + * should be retried. + */ + next = args.rdx; + cond_resched(); + /* Keep making SEAMCALLs until the TDMR is done */ + } while (next < tdmr->base + tdmr->size); + + return 0; +} + +static int init_tdmrs(struct tdmr_info_list *tdmr_list) +{ + int i; + + /* + * This operation is costly. It can be parallelized, + * but keep it simple for now. + */ + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + int ret; + + ret = init_tdmr(tdmr_entry(tdmr_list, i)); + if (ret) + return ret; + } + + return 0; +} + +static int init_tdx_module(void) +{ + struct tdx_tdmr_sysinfo tdmr_sysinfo; + int ret; + + /* + * To keep things simple, assume that all TDX-protected memory + * will come from the page allocator. Make sure all pages in the + * page allocator are TDX-usable memory. + * + * Build the list of "TDX-usable" memory regions which cover all + * pages in the page allocator to guarantee that. Do it while + * holding mem_hotplug_lock read-lock as the memory hotplug code + * path reads the @tdx_memlist to reject any new memory. + */ + get_online_mems(); + + ret = build_tdx_memlist(&tdx_memlist); + if (ret) + goto out_put_tdxmem; + + ret = get_tdx_tdmr_sysinfo(&tdmr_sysinfo); + if (ret) + goto err_free_tdxmem; + + /* Allocate enough space for constructing TDMRs */ + ret = alloc_tdmr_list(&tdx_tdmr_list, &tdmr_sysinfo); + if (ret) + goto err_free_tdxmem; + + /* Cover all TDX-usable memory regions in TDMRs */ + ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdmr_sysinfo); + if (ret) + goto err_free_tdmrs; + + /* Pass the TDMRs and the global KeyID to the TDX module */ + ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid); + if (ret) + goto err_free_pamts; + + /* Config the key of global KeyID on all packages */ + ret = config_global_keyid(); + if (ret) + goto err_reset_pamts; + + /* Initialize TDMRs to complete the TDX module initialization */ + ret = init_tdmrs(&tdx_tdmr_list); + if (ret) + goto err_reset_pamts; + + pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list)); + +out_put_tdxmem: + /* + * @tdx_memlist is written here and read at memory hotplug time. + * Lock out memory hotplug code while building it. + */ + put_online_mems(); + return ret; + +err_reset_pamts: + /* + * Part of PAMTs may already have been initialized by the + * TDX module. Flush cache before returning PAMTs back + * to the kernel. + */ + wbinvd_on_all_cpus(); + /* + * According to the TDX hardware spec, if the platform + * doesn't have the "partial write machine check" + * erratum, any kernel read/write will never cause #MC + * in kernel space, thus it's OK to not convert PAMTs + * back to normal. But do the conversion anyway here + * as suggested by the TDX spec. + */ + tdmrs_reset_pamt_all(&tdx_tdmr_list); +err_free_pamts: + tdmrs_free_pamt_all(&tdx_tdmr_list); +err_free_tdmrs: + free_tdmr_list(&tdx_tdmr_list); +err_free_tdxmem: + free_tdx_memlist(&tdx_memlist); + goto out_put_tdxmem; +} + +static int __tdx_enable(void) +{ + int ret; + + ret = init_tdx_module(); + if (ret) { + pr_err("module initialization failed (%d)\n", ret); + tdx_module_status = TDX_MODULE_ERROR; + return ret; + } + + pr_info("module initialized\n"); + tdx_module_status = TDX_MODULE_INITIALIZED; + + return 0; +} + +/** + * tdx_enable - Enable TDX module to make it ready to run TDX guests + * + * This function assumes the caller has: 1) held read lock of CPU hotplug + * lock to prevent any new cpu from becoming online; 2) done both VMXON + * and tdx_cpu_enable() on all online cpus. + * + * This function requires there's at least one online cpu for each CPU + * package to succeed. + * + * This function can be called in parallel by multiple callers. + * + * Return 0 if TDX is enabled successfully, otherwise error. + */ +int tdx_enable(void) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) + return -ENODEV; + + lockdep_assert_cpus_held(); + + mutex_lock(&tdx_module_lock); + + switch (tdx_module_status) { + case TDX_MODULE_UNINITIALIZED: + ret = __tdx_enable(); + break; + case TDX_MODULE_INITIALIZED: + /* Already initialized, great, tell the caller. */ + ret = 0; + break; + default: + /* Failed to initialize in the previous attempts */ + ret = -EINVAL; + break; + } + + mutex_unlock(&tdx_module_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tdx_enable); + +static bool is_pamt_page(unsigned long phys) +{ + struct tdmr_info_list *tdmr_list = &tdx_tdmr_list; + int i; + + /* Ensure that all remote 'tdmr_list' writes are visible: */ + smp_rmb(); + + /* + * The TDX module is no longer returning TDX_SYS_NOT_READY and + * is initialized. The 'tdmr_list' was initialized long ago + * and is now read-only. + */ + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + unsigned long base, size; + + tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size); + + if (phys >= base && phys < (base + size)) + return true; + } + + return false; +} + +/* + * Return whether the memory page at the given physical address is TDX + * private memory or not. + * + * This can be imprecise for two known reasons: + * 1. PAMTs are private memory and exist before the TDX module is + * ready and TDH_PHYMEM_PAGE_RDMD works. This is a relatively + * short window that occurs once per boot. + * 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the + * page. However, the page can still cause #MC until it has been + * fully converted to shared using 64-byte writes like MOVDIR64B. + * Buggy hosts might still leave #MC-causing memory in place which + * this function can not detect. + */ +static bool paddr_is_tdx_private(unsigned long phys) +{ + struct tdx_module_args args = { + .rcx = phys & PAGE_MASK, + }; + u64 sret; + + if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) + return false; + + /* Get page type from the TDX module */ + sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args); + + /* + * The SEAMCALL will not return success unless there is a + * working, "ready" TDX module. Assume an absence of TDX + * private pages until SEAMCALL is working. + */ + if (sret) + return false; + + /* + * SEAMCALL was successful -- read page type (via RCX): + * + * - PT_NDA: Page is not used by the TDX module + * - PT_RSVD: Reserved for Non-TDX use + * - Others: Page is used by the TDX module + * + * Note PAMT pages are marked as PT_RSVD but they are also TDX + * private memory. + */ + switch (args.rcx) { + case PT_NDA: + return false; + case PT_RSVD: + return is_pamt_page(phys); + default: + return true; + } +} + +/* + * Some TDX-capable CPUs have an erratum. A write to TDX private + * memory poisons that memory, and a subsequent read of that memory + * triggers #MC. + * + * Help distinguish erratum-triggered #MCs from a normal hardware one. + * Just print additional message to show such #MC may be result of the + * erratum. + */ +const char *tdx_dump_mce_info(struct mce *m) +{ + if (!m || !mce_is_memory_error(m) || !mce_usable_address(m)) + return NULL; + + if (!paddr_is_tdx_private(m->addr)) + return NULL; + + return "TDX private memory error. Possible kernel bug."; +} + +static __init int record_keyid_partitioning(u32 *tdx_keyid_start, + u32 *nr_tdx_keyids) +{ + u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids; + int ret; + + /* + * IA32_MKTME_KEYID_PARTIONING: + * Bit [31:0]: Number of MKTME KeyIDs. + * Bit [63:32]: Number of TDX private KeyIDs. + */ + ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids, + &_nr_tdx_keyids); + if (ret || !_nr_tdx_keyids) + return -EINVAL; + + /* TDX KeyIDs start after the last MKTME KeyID. */ + _tdx_keyid_start = _nr_mktme_keyids + 1; + + *tdx_keyid_start = _tdx_keyid_start; + *nr_tdx_keyids = _nr_tdx_keyids; + + return 0; +} + +static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn) +{ + struct tdx_memblock *tmb; + + /* + * This check assumes that the start_pfn<->end_pfn range does not + * cross multiple @tdx_memlist entries. A single memory online + * event across multiple memblocks (from which @tdx_memlist + * entries are derived at the time of module initialization) is + * not possible. This is because memory offline/online is done + * on granularity of 'struct memory_block', and the hotpluggable + * memory region (one memblock) must be multiple of memory_block. + */ + list_for_each_entry(tmb, &tdx_memlist, list) { + if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn) + return true; + } + return false; +} + +static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action, + void *v) +{ + struct memory_notify *mn = v; + + if (action != MEM_GOING_ONLINE) + return NOTIFY_OK; + + /* + * Empty list means TDX isn't enabled. Allow any memory + * to go online. + */ + if (list_empty(&tdx_memlist)) + return NOTIFY_OK; + + /* + * The TDX memory configuration is static and can not be + * changed. Reject onlining any memory which is outside of + * the static configuration whether it supports TDX or not. + */ + if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages)) + return NOTIFY_OK; + + return NOTIFY_BAD; +} + +static struct notifier_block tdx_memory_nb = { + .notifier_call = tdx_memory_notifier, +}; + +static void __init check_tdx_erratum(void) +{ + /* + * These CPUs have an erratum. A partial write from non-TD + * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX + * private memory poisons that memory, and a subsequent read of + * that memory triggers #MC. + */ + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: + setup_force_cpu_bug(X86_BUG_TDX_PW_MCE); + } +} + +void __init tdx_init(void) +{ + u32 tdx_keyid_start, nr_tdx_keyids; + int err; + + err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids); + if (err) + return; + + pr_info("BIOS enabled: private KeyID range [%u, %u)\n", + tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids); + + /* + * The TDX module itself requires one 'global KeyID' to protect + * its metadata. If there's only one TDX KeyID, there won't be + * any left for TDX guests thus there's no point to enable TDX + * at all. + */ + if (nr_tdx_keyids < 2) { + pr_err("initialization failed: too few private KeyIDs available.\n"); + return; + } + + /* + * At this point, hibernation_available() indicates whether or + * not hibernation support has been permanently disabled. + */ + if (hibernation_available()) { + pr_err("initialization failed: Hibernation support is enabled\n"); + return; + } + + err = register_memory_notifier(&tdx_memory_nb); + if (err) { + pr_err("initialization failed: register_memory_notifier() failed (%d)\n", + err); + return; + } + +#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) + pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n"); + acpi_suspend_lowlevel = NULL; +#endif + + /* + * Just use the first TDX KeyID as the 'global KeyID' and + * leave the rest for TDX guests. + */ + tdx_global_keyid = tdx_keyid_start; + tdx_guest_keyid_start = tdx_keyid_start + 1; + tdx_nr_guest_keyids = nr_tdx_keyids - 1; + + setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM); + + check_tdx_erratum(); +} diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h new file mode 100644 index 0000000000000000000000000000000000000000..b701f69485d32336e973aeb073f59735ef81cff6 --- /dev/null +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_VIRT_TDX_H +#define _X86_VIRT_TDX_H + +#include + +/* + * This file contains both macros and data structures defined by the TDX + * architecture and Linux defined software data structures and functions. + * The two should not be mixed together for better readability. The + * architectural definitions come first. + */ + +/* + * TDX module SEAMCALL leaf functions + */ +#define TDH_PHYMEM_PAGE_RDMD 24 +#define TDH_SYS_KEY_CONFIG 31 +#define TDH_SYS_INIT 33 +#define TDH_SYS_RD 34 +#define TDH_SYS_LP_INIT 35 +#define TDH_SYS_TDMR_INIT 36 +#define TDH_SYS_CONFIG 45 + +/* TDX page types */ +#define PT_NDA 0x0 +#define PT_RSVD 0x1 + +/* + * Global scope metadata field ID. + * + * See Table "Global Scope Metadata", TDX module 1.5 ABI spec. + */ +#define MD_FIELD_ID_MAX_TDMRS 0x9100000100000008ULL +#define MD_FIELD_ID_MAX_RESERVED_PER_TDMR 0x9100000100000009ULL +#define MD_FIELD_ID_PAMT_4K_ENTRY_SIZE 0x9100000100000010ULL +#define MD_FIELD_ID_PAMT_2M_ENTRY_SIZE 0x9100000100000011ULL +#define MD_FIELD_ID_PAMT_1G_ENTRY_SIZE 0x9100000100000012ULL + +/* + * Sub-field definition of metadata field ID. + * + * See Table "MD_FIELD_ID (Metadata Field Identifier / Sequence Header) + * Definition", TDX module 1.5 ABI spec. + * + * - Bit 33:32: ELEMENT_SIZE_CODE -- size of a single element of metadata + * + * 0: 8 bits + * 1: 16 bits + * 2: 32 bits + * 3: 64 bits + */ +#define MD_FIELD_ID_ELE_SIZE_CODE(_field_id) \ + (((_field_id) & GENMASK_ULL(33, 32)) >> 32) + +#define MD_FIELD_ID_ELE_SIZE_16BIT 1 + +struct tdmr_reserved_area { + u64 offset; + u64 size; +} __packed; + +#define TDMR_INFO_ALIGNMENT 512 +#define TDMR_INFO_PA_ARRAY_ALIGNMENT 512 + +struct tdmr_info { + u64 base; + u64 size; + u64 pamt_1g_base; + u64 pamt_1g_size; + u64 pamt_2m_base; + u64 pamt_2m_size; + u64 pamt_4k_base; + u64 pamt_4k_size; + /* + * The actual number of reserved areas depends on the value of + * field MD_FIELD_ID_MAX_RESERVED_PER_TDMR in the TDX module + * global metadata. + */ + DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas); +} __packed __aligned(TDMR_INFO_ALIGNMENT); + +/* + * Do not put any hardware-defined TDX structure representations below + * this comment! + */ + +/* Kernel defined TDX module status during module initialization. */ +enum tdx_module_status_t { + TDX_MODULE_UNINITIALIZED, + TDX_MODULE_INITIALIZED, + TDX_MODULE_ERROR +}; + +struct tdx_memblock { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; + int nid; +}; + +/* "TDMR info" part of "Global Scope Metadata" for constructing TDMRs */ +struct tdx_tdmr_sysinfo { + u16 max_tdmrs; + u16 max_reserved_per_tdmr; + u16 pamt_entry_size[TDX_PS_NR]; +}; + +/* Warn if kernel has less than TDMR_NR_WARN TDMRs after allocation */ +#define TDMR_NR_WARN 4 + +struct tdmr_info_list { + void *tdmrs; /* Flexible array to hold 'tdmr_info's */ + int nr_consumed_tdmrs; /* How many 'tdmr_info's are in use */ + + /* Metadata for finding target 'tdmr_info' and freeing @tdmrs */ + int tdmr_sz; /* Size of one 'tdmr_info' */ + int max_tdmrs; /* How many 'tdmr_info's are allocated */ +}; + +#endif diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index 785a00ce83c11e8bbfa8e02b131315606060c35c..38bcecb0e457d9741c142cada4d38ec65ff0f88b 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -116,8 +116,9 @@ void flush_cache_page(struct vm_area_struct*, #define flush_cache_mm(mm) flush_cache_all() #define flush_cache_dup_mm(mm) flush_cache_mm(mm) -#define flush_cache_vmap(start,end) flush_cache_all() -#define flush_cache_vunmap(start,end) flush_cache_all() +#define flush_cache_vmap(start,end) flush_cache_all() +#define flush_cache_vmap_early(start,end) do { } while (0) +#define flush_cache_vunmap(start,end) flush_cache_all() void flush_dcache_folio(struct folio *folio); #define flush_dcache_folio flush_dcache_folio @@ -140,6 +141,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_vmap(start,end) do { } while (0) +#define flush_cache_vmap_early(start,end) do { } while (0) #define flush_cache_vunmap(start,end) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 diff --git a/block/bio-integrity.c b/block/bio-integrity.c index feef615e2c9c8935e2cd11a4b1e09f115171d328..c9a16fba58b9c47f5424be9a8c7c6681d176b986 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -336,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes, if (nr_vecs > BIO_MAX_VECS) return -E2BIG; if (nr_vecs > UIO_FASTIOV) { - bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL); + bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL); if (!bvec) return -ENOMEM; pages = NULL; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e303fd31731377cae25738cd82beea6b96a4d1b9..ff93c385ba5afb6920b53fdbcf96bd5d3970d17a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -300,7 +300,7 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) * @disk: gendisk the new blkg is associated with * @gfp_mask: allocation mask to use * - * Allocate a new blkg assocating @blkcg and @q. + * Allocate a new blkg associating @blkcg and @disk. */ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, gfp_t gfp_mask) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 089fcb9cfce37011f4cf6ec1f86ba36853fed381..c8beec6d7df0863bb4811c12f5ff576e7a5121c7 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1261,7 +1261,7 @@ static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now) static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; - u64 last_period, cur_period; + u64 __maybe_unused last_period, cur_period; u64 vtime, vtarget; int i; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5cbeb9344f2f5cea3121197892a412deab777838..94668e72ab09bf0922c8d79846a87d91fdbeb1ba 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -479,23 +479,6 @@ out: return res; } -static int hctx_run_show(void *data, struct seq_file *m) -{ - struct blk_mq_hw_ctx *hctx = data; - - seq_printf(m, "%lu\n", hctx->run); - return 0; -} - -static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count, - loff_t *ppos) -{ - struct blk_mq_hw_ctx *hctx = data; - - hctx->run = 0; - return count; -} - static int hctx_active_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; @@ -624,7 +607,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { {"tags_bitmap", 0400, hctx_tags_bitmap_show}, {"sched_tags", 0400, hctx_sched_tags_show}, {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, - {"run", 0600, hctx_run_show, hctx_run_write}, {"active", 0400, hctx_active_show}, {"dispatch_busy", 0400, hctx_dispatch_busy_show}, {"type", 0400, hctx_type_show}, diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 67c95f31b15bb1e16c022efe644dcb0900ef12f6..451a2c1f1f32186989160ed6e77e87cb8d14f4f1 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -324,8 +324,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) return; - hctx->run++; - /* * A return of -EAGAIN is an indication that hctx->dispatch is not * empty and we must run again in order to avoid starving flushes. diff --git a/block/blk-mq.c b/block/blk-mq.c index c11c97afa0bc1de400a68cff21e48d7f420e6bab..aa87fcfda1ecfc875c86a0258fe16e707ce3f167 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -772,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); @@ -1859,6 +1864,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); + /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait @@ -2891,8 +2912,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, return NULL; } -/* return true if this @rq can be used for @bio */ -static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, +/* + * Check if we can use the passed on request for submitting the passed in bio, + * and remove it from the request list if it can be used. + */ +static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, struct bio *bio) { enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); @@ -2952,12 +2976,6 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - if (bio_may_exceed_limits(bio, &q->limits)) { - bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); - if (!bio) - return; - } - bio_set_ioprio(bio); if (plug) { @@ -2966,16 +2984,26 @@ void blk_mq_submit_bio(struct bio *bio) rq = NULL; } if (rq) { + if (unlikely(bio_may_exceed_limits(bio, &q->limits))) { + bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + return; + } if (!bio_integrity_prep(bio)) return; if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) return; - if (blk_mq_can_use_cached_rq(rq, plug, bio)) + if (blk_mq_use_cached_rq(rq, plug, bio)) goto done; percpu_ref_get(&q->q_usage_counter); } else { if (unlikely(bio_queue_enter(bio))) return; + if (unlikely(bio_may_exceed_limits(bio, &q->limits))) { + bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + goto fail; + } if (!bio_integrity_prep(bio)) goto fail; } diff --git a/block/ioprio.c b/block/ioprio.c index b5a942519a797ceab4729231dd7c673e50ab0613..73301a261429ff9e04a166aefa5c72df55eaee57 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -139,32 +139,6 @@ out: return ret; } -/* - * If the task has set an I/O priority, use that. Otherwise, return - * the default I/O priority. - * - * Expected to be called for current task or with task_lock() held to keep - * io_context stable. - */ -int __get_task_ioprio(struct task_struct *p) -{ - struct io_context *ioc = p->io_context; - int prio; - - if (p != current) - lockdep_assert_held(&p->alloc_lock); - if (ioc) - prio = ioc->ioprio; - else - prio = IOPRIO_DEFAULT; - - if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) - prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), - task_nice_ioprio(p)); - return prio; -} -EXPORT_SYMBOL_GPL(__get_task_ioprio); - static int get_task_ioprio(struct task_struct *p) { int ret; diff --git a/block/partitions/core.c b/block/partitions/core.c index e6ac73617f3e12db18d7bc9f4ade561494739cd6..cab0d76a828e37eb90e38d91b4e92a61e703717e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -562,8 +562,8 @@ static bool blk_add_partition(struct gendisk *disk, part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) { - printk(KERN_ERR " %s: p%d could not be added: %ld\n", - disk->disk_name, p, -PTR_ERR(part)); + printk(KERN_ERR " %s: p%d could not be added: %pe\n", + disk->disk_name, p, part); return true; } diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ab2a82cb1b0b48ab21682bdb87c052707f19d282..7b7c605166e0c1c7d2a4c9e1f1bce1f05799d4f6 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -673,6 +674,78 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +/* + * Only a single callback can be registered for CXL CPER events. + */ +static DECLARE_RWSEM(cxl_cper_rw_sem); +static cxl_cper_callback cper_callback; + +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) + +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + +static void cxl_cper_post_event(enum cxl_event_type event_type, + struct cxl_cper_event_rec *rec) +{ + if (rec->hdr.length <= sizeof(rec->hdr) || + rec->hdr.length > sizeof(*rec)) { + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", + rec->hdr.length); + return; + } + + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { + pr_err(FW_WARN "CXL CPER invalid event\n"); + return; + } + + guard(rwsem_read)(&cxl_cper_rw_sem); + if (cper_callback) + cper_callback(event_type, rec); +} + +int cxl_cper_register_callback(cxl_cper_callback callback) +{ + guard(rwsem_write)(&cxl_cper_rw_sem); + if (cper_callback) + return -EINVAL; + cper_callback = callback; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); + +int cxl_cper_unregister_callback(cxl_cper_callback callback) +{ + guard(rwsem_write)(&cxl_cper_rw_sem); + if (callback != cper_callback) + return -EINVAL; + cper_callback = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); + static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -707,6 +780,22 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); + } else if (guid_equal(sec_type, + &CPER_SEC_CXL_MEM_MODULE_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 9ef5f1bdcfdbcf5d5f09827f4e5fae7e56f6e10b..d6b85f0f6082f72421168b26fec4b2fff09b4e13 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -58,14 +58,22 @@ struct target_cache { struct node_cache_attrs cache_attrs; }; +enum { + NODE_ACCESS_CLASS_0 = 0, + NODE_ACCESS_CLASS_1, + NODE_ACCESS_CLASS_GENPORT_SINK, + NODE_ACCESS_CLASS_MAX, +}; + struct memory_target { struct list_head node; unsigned int memory_pxm; unsigned int processor_pxm; struct resource memregions; - struct node_hmem_attrs hmem_attrs[2]; + struct access_coordinate coord[NODE_ACCESS_CLASS_MAX]; struct list_head caches; struct node_cache_attrs cache_attrs; + u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; }; @@ -100,6 +108,47 @@ static struct memory_target *find_mem_target(unsigned int mem_pxm) return NULL; } +static struct memory_target *acpi_find_genport_target(u32 uid) +{ + struct memory_target *target; + u32 target_uid; + u8 *uid_ptr; + + list_for_each_entry(target, &targets, node) { + uid_ptr = target->gen_port_device_handle + 8; + target_uid = *(u32 *)uid_ptr; + if (uid == target_uid) + return target; + } + + return NULL; +} + +/** + * acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port + * @uid: ACPI unique id + * @coord: The access coordinates written back out for the generic port + * + * Return: 0 on success. Errno on failure. + * + * Only supports device handles that are ACPI. Assume ACPI0016 HID for CXL. + */ +int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + struct memory_target *target; + + guard(mutex)(&target_lock); + target = acpi_find_genport_target(uid); + if (!target) + return -ENOENT; + + *coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK]; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(acpi_get_genport_coordinates, CXL); + static __init void alloc_memory_initiator(unsigned int cpu_pxm) { struct memory_initiator *initiator; @@ -120,8 +169,7 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm) list_add_tail(&initiator->node, &initiators); } -static __init void alloc_memory_target(unsigned int mem_pxm, - resource_size_t start, resource_size_t len) +static __init struct memory_target *alloc_target(unsigned int mem_pxm) { struct memory_target *target; @@ -129,7 +177,7 @@ static __init void alloc_memory_target(unsigned int mem_pxm, if (!target) { target = kzalloc(sizeof(*target), GFP_KERNEL); if (!target) - return; + return NULL; target->memory_pxm = mem_pxm; target->processor_pxm = PXM_INVAL; target->memregions = (struct resource) { @@ -142,6 +190,19 @@ static __init void alloc_memory_target(unsigned int mem_pxm, INIT_LIST_HEAD(&target->caches); } + return target; +} + +static __init void alloc_memory_target(unsigned int mem_pxm, + resource_size_t start, + resource_size_t len) +{ + struct memory_target *target; + + target = alloc_target(mem_pxm); + if (!target) + return; + /* * There are potentially multiple ranges per PXM, so record each * in the per-target memregions resource tree. @@ -152,6 +213,18 @@ static __init void alloc_memory_target(unsigned int mem_pxm, start, start + len, mem_pxm); } +static __init void alloc_genport_target(unsigned int mem_pxm, u8 *handle) +{ + struct memory_target *target; + + target = alloc_target(mem_pxm); + if (!target) + return; + + memcpy(target->gen_port_device_handle, handle, + ACPI_SRAT_DEVICE_HANDLE_SIZE); +} + static __init const char *hmat_data_type(u8 type) { switch (type) { @@ -228,24 +301,24 @@ static void hmat_update_target_access(struct memory_target *target, { switch (type) { case ACPI_HMAT_ACCESS_LATENCY: - target->hmem_attrs[access].read_latency = value; - target->hmem_attrs[access].write_latency = value; + target->coord[access].read_latency = value; + target->coord[access].write_latency = value; break; case ACPI_HMAT_READ_LATENCY: - target->hmem_attrs[access].read_latency = value; + target->coord[access].read_latency = value; break; case ACPI_HMAT_WRITE_LATENCY: - target->hmem_attrs[access].write_latency = value; + target->coord[access].write_latency = value; break; case ACPI_HMAT_ACCESS_BANDWIDTH: - target->hmem_attrs[access].read_bandwidth = value; - target->hmem_attrs[access].write_bandwidth = value; + target->coord[access].read_bandwidth = value; + target->coord[access].write_bandwidth = value; break; case ACPI_HMAT_READ_BANDWIDTH: - target->hmem_attrs[access].read_bandwidth = value; + target->coord[access].read_bandwidth = value; break; case ACPI_HMAT_WRITE_BANDWIDTH: - target->hmem_attrs[access].write_bandwidth = value; + target->coord[access].write_bandwidth = value; break; default: break; @@ -291,11 +364,28 @@ static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) } } +static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_pxm, + u8 mem_hier, u8 type, u32 value) +{ + struct memory_target *target = find_mem_target(tgt_pxm); + + if (mem_hier != ACPI_HMAT_MEMORY) + return; + + if (target && target->processor_pxm == init_pxm) { + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_0); + /* If the node has a CPU, update access 1 */ + if (node_state(pxm_to_node(init_pxm), N_CPU)) + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_1); + } +} + static __init int hmat_parse_locality(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_hmat_locality *hmat_loc = (void *)header; - struct memory_target *target; unsigned int init, targ, total_size, ipds, tpds; u32 *inits, *targs, value; u16 *entries; @@ -336,15 +426,8 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, inits[init], targs[targ], value, hmat_data_type_suffix(type)); - if (mem_hier == ACPI_HMAT_MEMORY) { - target = find_mem_target(targs[targ]); - if (target && target->processor_pxm == inits[init]) { - hmat_update_target_access(target, type, value, 0); - /* If the node has a CPU, update access 1 */ - if (node_state(pxm_to_node(inits[init]), N_CPU)) - hmat_update_target_access(target, type, value, 1); - } - } + hmat_update_target(targs[targ], inits[init], + mem_hier, type, value); } } @@ -491,6 +574,27 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header, return 0; } +static __init int srat_parse_genport_affinity(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_generic_affinity *ga = (void *)header; + + if (!ga) + return -EINVAL; + + if (!(ga->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)) + return 0; + + /* Skip PCI device_handle for now */ + if (ga->device_handle_type != 0) + return 0; + + alloc_genport_target(ga->proximity_domain, + (u8 *)ga->device_handle); + + return 0; +} + static u32 hmat_initiator_perf(struct memory_target *target, struct memory_initiator *initiator, struct acpi_hmat_locality *hmat_loc) @@ -592,6 +696,11 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; + /* Don't update for generic port if there's no device handle */ + if (access == NODE_ACCESS_CLASS_GENPORT_SINK && + !(*(u16 *)target->gen_port_device_handle)) + return; + bitmap_zero(p_nodes, MAX_NUMNODES); /* * If the Address Range Structure provides a local processor pxm, set @@ -661,6 +770,14 @@ static void __hmat_register_target_initiators(struct memory_target *target, } } +static void hmat_register_generic_target_initiators(struct memory_target *target) +{ + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); + + __hmat_register_target_initiators(target, p_nodes, + NODE_ACCESS_CLASS_GENPORT_SINK); +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -681,7 +798,7 @@ static void hmat_register_target_cache(struct memory_target *target) static void hmat_register_target_perf(struct memory_target *target, int access) { unsigned mem_nid = pxm_to_node(target->memory_pxm); - node_set_perf_attrs(mem_nid, &target->hmem_attrs[access], access); + node_set_perf_attrs(mem_nid, &target->coord[access], access); } static void hmat_register_target_devices(struct memory_target *target) @@ -712,6 +829,17 @@ static void hmat_register_target(struct memory_target *target) */ hmat_register_target_devices(target); + /* + * Register generic port perf numbers. The nid may not be + * initialized and is still NUMA_NO_NODE. + */ + mutex_lock(&target_lock); + if (*(u16 *)target->gen_port_device_handle) { + hmat_register_generic_target_initiators(target); + target->registered = true; + } + mutex_unlock(&target_lock); + /* * Skip offline nodes. This can happen when memory * marked EFI_MEMORY_SP, "specific purpose", is applied @@ -726,8 +854,8 @@ static void hmat_register_target(struct memory_target *target) if (!target->registered) { hmat_register_target_initiators(target); hmat_register_target_cache(target); - hmat_register_target_perf(target, 0); - hmat_register_target_perf(target, 1); + hmat_register_target_perf(target, NODE_ACCESS_CLASS_0); + hmat_register_target_perf(target, NODE_ACCESS_CLASS_1); target->registered = true; } mutex_unlock(&target_lock); @@ -765,7 +893,7 @@ static int hmat_set_default_dram_perf(void) int rc; int nid, pxm; struct memory_target *target; - struct node_hmem_attrs *attrs; + struct access_coordinate *attrs; if (!default_dram_type) return -EIO; @@ -775,7 +903,7 @@ static int hmat_set_default_dram_perf(void) target = find_mem_target(pxm); if (!target) continue; - attrs = &target->hmem_attrs[1]; + attrs = &target->coord[1]; rc = mt_set_default_dram_perf(nid, attrs, "ACPI HMAT"); if (rc) return rc; @@ -789,7 +917,7 @@ static int hmat_calculate_adistance(struct notifier_block *self, { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); struct memory_target *target; - struct node_hmem_attrs *perf; + struct access_coordinate *perf; int *adist = data; int pxm; @@ -802,7 +930,7 @@ static int hmat_calculate_adistance(struct notifier_block *self, hmat_update_target_attrs(target, p_nodes, 1); mutex_unlock(&target_lock); - perf = &target->hmem_attrs[1]; + perf = &target->coord[1]; if (mt_perf_to_adistance(perf, adist)) return NOTIFY_OK; @@ -870,6 +998,13 @@ static __init int hmat_init(void) ACPI_SRAT_TYPE_MEMORY_AFFINITY, srat_parse_mem_affinity, 0) < 0) goto out_put; + + if (acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY, + srat_parse_genport_affinity, 0) < 0) + goto out_put; + acpi_put_table(tbl); status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0ba008773b00079cad8df514e680ce35bf9960f1..e6ed1ba91e5c9152c7eeec242c302f91fda42d5d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1561,8 +1561,7 @@ static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) return fwspec ? fwspec->ops : NULL; } -static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, - const u32 *id_in) +static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) { int err; const struct iommu_ops *ops; @@ -1576,7 +1575,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, ops = acpi_iommu_fwspec_ops(dev); if (ops) { mutex_unlock(&iommu_probe_device_lock); - return ops; + return 0; } err = iort_iommu_configure_id(dev, id_in); @@ -1593,12 +1592,14 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, /* Ignore all other errors apart from EPROBE_DEFER */ if (err == -EPROBE_DEFER) { - return ERR_PTR(err); + return err; } else if (err) { dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - return NULL; + return -ENODEV; } - return acpi_iommu_fwspec_ops(dev); + if (!acpi_iommu_fwspec_ops(dev)) + return -ENODEV; + return 0; } #else /* !CONFIG_IOMMU_API */ @@ -1610,10 +1611,9 @@ int acpi_iommu_fwspec_init(struct device *dev, u32 id, return -ENODEV; } -static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, - const u32 *id_in) +static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) { - return NULL; + return -ENODEV; } #endif /* !CONFIG_IOMMU_API */ @@ -1627,7 +1627,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { - const struct iommu_ops *iommu; + int ret; if (attr == DEV_DMA_NOT_SUPPORTED) { set_dma_ops(dev, &dma_dummy_ops); @@ -1636,12 +1636,16 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, acpi_arch_dma_setup(dev); - iommu = acpi_iommu_configure_id(dev, input_id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) + ret = acpi_iommu_configure_id(dev, input_id); + if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; - arch_setup_dma_ops(dev, 0, U64_MAX, - iommu, attr == DEV_DMA_COHERENT); + /* + * Historically this routine doesn't fail driver probing due to errors + * in acpi_iommu_configure_id() + */ + + arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT); return 0; } diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index c1516337f6682840bdee196e50c1de0cc2472a12..b07f7d091d133c6ade25749ca746b2a64c6bb5e8 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -251,8 +251,9 @@ int __init_or_acpilib acpi_table_parse_entries_array( return -ENODEV; } - count = acpi_parse_entries_array(id, table_size, table_header, - proc, proc_num, max_entries); + count = acpi_parse_entries_array(id, table_size, + (union fw_table_header *)table_header, + proc, proc_num, max_entries); acpi_put_table(table_header); return count; diff --git a/drivers/base/node.c b/drivers/base/node.c index 433897eecbdcc9fcbbb80d5e5f35bd8683d26d03..1c05640461dd1679755c3811b4677e152bb8d875 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -74,14 +74,14 @@ static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); * @dev: Device for this memory access class * @list_node: List element in the node's access list * @access: The access class rank - * @hmem_attrs: Heterogeneous memory performance attributes + * @coord: Heterogeneous memory performance coordinates */ struct node_access_nodes { struct device dev; struct list_head list_node; unsigned int access; #ifdef CONFIG_HMEM_REPORTING - struct node_hmem_attrs hmem_attrs; + struct access_coordinate coord; #endif }; #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) @@ -167,7 +167,7 @@ static ssize_t property##_show(struct device *dev, \ char *buf) \ { \ return sysfs_emit(buf, "%u\n", \ - to_access_nodes(dev)->hmem_attrs.property); \ + to_access_nodes(dev)->coord.property); \ } \ static DEVICE_ATTR_RO(property) @@ -187,10 +187,10 @@ static struct attribute *access_attrs[] = { /** * node_set_perf_attrs - Set the performance values for given access class * @nid: Node identifier to be set - * @hmem_attrs: Heterogeneous memory performance attributes + * @coord: Heterogeneous memory performance coordinates * @access: The access class the for the given attributes */ -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, unsigned int access) { struct node_access_nodes *c; @@ -205,7 +205,7 @@ void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, if (!c) return; - c->hmem_attrs = *hmem_attrs; + c->coord = *coord; for (i = 0; access_attrs[i] != NULL; i++) { if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], "initiators")) { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 72b7a92337b1889a0a5868d47489d26a718290b5..cd6e559648b21bd3661caed21573238f61038401 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - if (mc146818_get_time(&time) < 0) { + if (mc146818_get_time(&time, 1000) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 146b32fa7b47ade338d0379e021aa224412ec657..f8145499da38c834225b8f2d2ee0448d19adc8e1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -165,39 +165,37 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file) return get_size(lo->lo_offset, lo->lo_sizelimit, file); } +/* + * We support direct I/O only if lo_offset is aligned with the logical I/O size + * of backing device, and the logical block size of loop is bigger than that of + * the backing device. + */ +static bool lo_bdev_can_use_dio(struct loop_device *lo, + struct block_device *backing_bdev) +{ + unsigned short sb_bsize = bdev_logical_block_size(backing_bdev); + + if (queue_logical_block_size(lo->lo_queue) < sb_bsize) + return false; + if (lo->lo_offset & (sb_bsize - 1)) + return false; + return true; +} + static void __loop_update_dio(struct loop_device *lo, bool dio) { struct file *file = lo->lo_backing_file; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - unsigned short sb_bsize = 0; - unsigned dio_align = 0; + struct inode *inode = file->f_mapping->host; + struct block_device *backing_bdev = NULL; bool use_dio; - if (inode->i_sb->s_bdev) { - sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev); - dio_align = sb_bsize - 1; - } + if (S_ISBLK(inode->i_mode)) + backing_bdev = I_BDEV(inode); + else if (inode->i_sb->s_bdev) + backing_bdev = inode->i_sb->s_bdev; - /* - * We support direct I/O only if lo_offset is aligned with the - * logical I/O size of backing device, and the logical block - * size of loop is bigger than the backing device's. - * - * TODO: the above condition may be loosed in the future, and - * direct I/O may be switched runtime at that time because most - * of requests in sane applications should be PAGE_SIZE aligned - */ - if (dio) { - if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && - !(lo->lo_offset & dio_align) && - (file->f_mode & FMODE_CAN_ODIRECT)) - use_dio = true; - else - use_dio = false; - } else { - use_dio = false; - } + use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) && + (!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev)); if (lo->use_dio == use_dio) return; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4e72ec4e25ac5a0f41bca299e7efaecf6503c451..33a8f37bb6a1f504060f783c6d727e4c76026a2e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -508,7 +508,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, struct iov_iter *iter, int msg_flags, int *sent) { int result; - struct msghdr msg; + struct msghdr msg = {} ; unsigned int noreclaim_flag; if (unlikely(!sock)) { @@ -524,10 +524,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; sock->sk->sk_use_task_frag = false; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9f7695f00c2db8494a0230f2337a64d2fb4d3a14..36755f263e8ec03b1828bf44a05cc3b54bb6a03f 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1840,7 +1840,7 @@ static void null_del_dev(struct nullb *nullb) dev = nullb->dev; - ida_simple_remove(&nullb_indexes, nullb->index); + ida_free(&nullb_indexes, nullb->index); list_del_init(&nullb->list); @@ -2174,7 +2174,7 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); mutex_lock(&lock); - rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); + rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) { mutex_unlock(&lock); goto out_cleanup_zone; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a999b698b131f7763916c3bd0de5c87478fd0df4..12b5d53ec85645fb22395d41adef81d13cdb7292 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req) static void rbd_lock_del_request(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; - bool need_wakeup; + bool need_wakeup = false; lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); - rbd_assert(!list_empty(&img_req->lock_item)); - list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && - list_empty(&rbd_dev->running_list)); + if (!list_empty(&img_req->lock_item)) { + list_del_init(&img_req->lock_item); + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + list_empty(&rbd_dev->running_list)); + } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) complete(&rbd_dev->releasing_wait); @@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result) return; } - list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { + while (!list_empty(&rbd_dev->acquiring_list)) { + img_req = list_first_entry(&rbd_dev->acquiring_list, + struct rbd_img_request, lock_item); mutex_lock(&img_req->state_mutex); rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); + if (!result) + list_move_tail(&img_req->lock_item, + &rbd_dev->running_list); + else + list_del_init(&img_req->lock_item); rbd_img_schedule(img_req, result); mutex_unlock(&img_req->state_mutex); } - - list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } static bool locker_equal(const struct ceph_locker *lhs, @@ -5326,7 +5332,7 @@ static void rbd_dev_release(struct device *dev) if (need_put) { destroy_workqueue(rbd_dev->task_wq); - ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); + ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); } rbd_dev_free(rbd_dev); @@ -5402,9 +5408,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; /* get an id and fill in device name */ - rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0, - minor_to_rbd_dev_id(1 << MINORBITS), - GFP_KERNEL); + rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida, + minor_to_rbd_dev_id(1 << MINORBITS) - 1, + GFP_KERNEL); if (rbd_dev->dev_id < 0) goto fail_rbd_dev; @@ -5425,7 +5431,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return rbd_dev; fail_dev_id: - ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); + ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); fail_rbd_dev: rbd_dev_free(rbd_dev); return NULL; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3b6b9abb8ce1f4d90f66b5ff94c1499b98208fd8..5bf98fd6a651a506ff294545d6241f608af34568 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -367,8 +367,6 @@ static void virtblk_done(struct virtqueue *vq) blk_mq_complete_request(req); req_done = true; } - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); /* In case queue is stopped waiting for more buffers. */ diff --git a/drivers/clk/qcom/gcc-x1e80100.c b/drivers/clk/qcom/gcc-x1e80100.c index 74db7fef237b4d3f11e758d6be2bf0744056aa86..d7182d6e978372ce467da5d0c7b4fd0613eb0d8b 100644 --- a/drivers/clk/qcom/gcc-x1e80100.c +++ b/drivers/clk/qcom/gcc-x1e80100.c @@ -4,8 +4,9 @@ */ #include +#include #include -#include +#include #include #include diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c index 32daaac9b13208fc92604c89b625ec914ef3e357..ca7a06489c405f1d013e0fedd997413c43a19d65 100644 --- a/drivers/clocksource/timer-cadence-ttc.c +++ b/drivers/clocksource/timer-cadence-ttc.c @@ -69,7 +69,7 @@ * @base_addr: Base address of timer * @freq: Timer input clock frequency * @clk: Associated clock source - * @clk_rate_change_nb Notifier block for clock rate changes + * @clk_rate_change_nb: Notifier block for clock rate changes */ struct ttc_timer { void __iomem *base_addr; @@ -134,7 +134,7 @@ static void ttc_set_interval(struct ttc_timer *timer, * @irq: IRQ number of the Timer * @dev_id: void pointer to the ttc_timer instance * - * returns: Always IRQ_HANDLED - success + * Returns: Always IRQ_HANDLED - success **/ static irqreturn_t ttc_clock_event_interrupt(int irq, void *dev_id) { @@ -151,8 +151,9 @@ static irqreturn_t ttc_clock_event_interrupt(int irq, void *dev_id) /** * __ttc_clocksource_read - Reads the timer counter register + * @cs: &clocksource to read from * - * returns: Current timer counter register value + * Returns: Current timer counter register value **/ static u64 __ttc_clocksource_read(struct clocksource *cs) { @@ -173,7 +174,7 @@ static u64 notrace ttc_sched_clock_read(void) * @cycles: Timer interval ticks * @evt: Address of clock event instance * - * returns: Always 0 - success + * Returns: Always %0 - success **/ static int ttc_set_next_event(unsigned long cycles, struct clock_event_device *evt) @@ -186,9 +187,12 @@ static int ttc_set_next_event(unsigned long cycles, } /** - * ttc_set_{shutdown|oneshot|periodic} - Sets the state of timer - * + * ttc_shutdown - Sets the state of timer * @evt: Address of clock event instance + * + * Used for shutdown or oneshot. + * + * Returns: Always %0 - success **/ static int ttc_shutdown(struct clock_event_device *evt) { @@ -202,6 +206,12 @@ static int ttc_shutdown(struct clock_event_device *evt) return 0; } +/** + * ttc_set_periodic - Sets the state of timer + * @evt: Address of clock event instance + * + * Returns: Always %0 - success + */ static int ttc_set_periodic(struct clock_event_device *evt) { struct ttc_timer_clockevent *ttce = to_ttc_timer_clkevent(evt); diff --git a/drivers/clocksource/timer-ep93xx.c b/drivers/clocksource/timer-ep93xx.c index bc0ca6e12334903dd8ac17364e96469a42bbf5ab..6981ff3ac8a940be37b8dc247e59e32c2604f992 100644 --- a/drivers/clocksource/timer-ep93xx.c +++ b/drivers/clocksource/timer-ep93xx.c @@ -155,9 +155,8 @@ static int __init ep93xx_timer_of_init(struct device_node *np) ep93xx_tcu = tcu; irq = irq_of_parse_and_map(np, 0); - if (irq == 0) - irq = -EINVAL; - if (irq < 0) { + if (!irq) { + ret = -EINVAL; pr_err("EP93XX Timer Can't parse IRQ %d", irq); goto out_free; } diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 57857c0dfba97e0bfdcd5190e8aee31e11028667..e66dcbd6656658dd32f913189fceebda87e8ccbf 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -61,12 +61,19 @@ static int riscv_clock_next_event(unsigned long delta, return 0; } +static int riscv_clock_shutdown(struct clock_event_device *evt) +{ + riscv_clock_event_stop(); + return 0; +} + static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, + .set_state_shutdown = riscv_clock_shutdown, }; /* diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 5f60f6bd33866b4edc3ee2e248acb6d510f468e0..56acf26172621ffb96a14af47ccf92d31af15501 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -183,7 +183,7 @@ static inline u32 dmtimer_read(struct dmtimer *timer, u32 reg) * dmtimer_write - write timer registers in posted and non-posted mode * @timer: timer pointer over which write operation is to perform * @reg: lowest byte holds the register offset - * @value: data to write into the register + * @val: data to write into the register * * The posted mode bit is encoded in reg. Note that in posted mode, the write * pending bit must be checked. Otherwise a write on a register which has a @@ -949,7 +949,7 @@ static int omap_dm_timer_set_int_enable(struct omap_dm_timer *cookie, /** * omap_dm_timer_set_int_disable - disable timer interrupts - * @timer: pointer to timer handle + * @cookie: pointer to timer cookie * @mask: bit mask of interrupts to be disabled * * Disables the specified timer interrupts for a timer. diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 8ea1d340e4385089b3b449c07a0d6e1e679ab60a..67998dbd1d46b49dc623a0929c262174965bb601 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -5,6 +5,7 @@ menuconfig CXL_BUS select FW_LOADER select FW_UPLOAD select PCI_DOE + select FIRMWARE_TABLE help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -54,8 +55,10 @@ config CXL_MEM_RAW_COMMANDS config CXL_ACPI tristate "CXL ACPI: Platform Support" depends on ACPI + depends on ACPI_NUMA default CXL_BUS select ACPI_TABLE_LIB + select ACPI_HMAT help Enable support for host managed device memory (HDM) resources published by a platform's ACPI CXL memory layout description. See diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 2034eb4ce83fb7531be4148e4db0679a39b4587b..dcf2b39e1048822ca90324667d85f68225c05fa4 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "cxlpci.h" #include "cxl.h" @@ -17,6 +18,10 @@ struct cxl_cxims_data { u64 xormaps[] __counted_by(nr_maps); }; +static const guid_t acpi_cxl_qtg_id_guid = + GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, + 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); + /* * Find a targets entry (n) in the host bridge interleave list. * CXL Specification 3.0 Table 9-22 @@ -194,6 +199,123 @@ struct cxl_cfmws_context { int id; }; +/** + * cxl_acpi_evaluate_qtg_dsm - Retrieve QTG ids via ACPI _DSM + * @handle: ACPI handle + * @coord: performance access coordinates + * @entries: number of QTG IDs to return + * @qos_class: int array provided by caller to return QTG IDs + * + * Return: number of QTG IDs returned, or -errno for errors + * + * Issue QTG _DSM with accompanied bandwidth and latency data in order to get + * the QTG IDs that are suitable for the performance point in order of most + * suitable to least suitable. Write back array of QTG IDs and return the + * actual number of QTG IDs written back. + */ +static int +cxl_acpi_evaluate_qtg_dsm(acpi_handle handle, struct access_coordinate *coord, + int entries, int *qos_class) +{ + union acpi_object *out_obj, *out_buf, *obj; + union acpi_object in_array[4] = { + [0].integer = { ACPI_TYPE_INTEGER, coord->read_latency }, + [1].integer = { ACPI_TYPE_INTEGER, coord->write_latency }, + [2].integer = { ACPI_TYPE_INTEGER, coord->read_bandwidth }, + [3].integer = { ACPI_TYPE_INTEGER, coord->write_bandwidth }, + }; + union acpi_object in_obj = { + .package = { + .type = ACPI_TYPE_PACKAGE, + .count = 4, + .elements = in_array, + }, + }; + int count, pkg_entries, i; + u16 max_qtg; + int rc; + + if (!entries) + return -EINVAL; + + out_obj = acpi_evaluate_dsm(handle, &acpi_cxl_qtg_id_guid, 1, 1, &in_obj); + if (!out_obj) + return -ENXIO; + + if (out_obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + /* Check Max QTG ID */ + obj = &out_obj->package.elements[0]; + if (obj->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + max_qtg = obj->integer.value; + + /* It's legal to have 0 QTG entries */ + pkg_entries = out_obj->package.count; + if (pkg_entries <= 1) { + rc = 0; + goto out; + } + + /* Retrieve QTG IDs package */ + obj = &out_obj->package.elements[1]; + if (obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + pkg_entries = obj->package.count; + count = min(entries, pkg_entries); + for (i = 0; i < count; i++) { + u16 qtg_id; + + out_buf = &obj->package.elements[i]; + if (out_buf->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + qtg_id = out_buf->integer.value; + if (qtg_id > max_qtg) + pr_warn("QTG ID %u greater than MAX %u\n", + qtg_id, max_qtg); + + qos_class[i] = qtg_id; + } + rc = count; + +out: + ACPI_FREE(out_obj); + return rc; +} + +static int cxl_acpi_qos_class(struct cxl_root *cxl_root, + struct access_coordinate *coord, int entries, + int *qos_class) +{ + struct device *dev = cxl_root->port.uport_dev; + acpi_handle handle; + + if (!dev_is_platform(dev)) + return -ENODEV; + + handle = ACPI_HANDLE(dev); + if (!handle) + return -ENODEV; + + return cxl_acpi_evaluate_qtg_dsm(handle, coord, entries, qos_class); +} + +static const struct cxl_root_ops acpi_root_ops = { + .qos_class = cxl_acpi_qos_class, +}; + static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, const unsigned long end) { @@ -389,8 +511,29 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, return 0; } +static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) +{ + struct acpi_device *hb = to_cxl_host_bridge(NULL, dev); + u32 uid; + int rc; + + if (kstrtou32(acpi_device_uid(hb), 0, &uid)) + return -EINVAL; + + rc = acpi_get_genport_coordinates(uid, &dport->hb_coord); + if (rc < 0) + return rc; + + /* Adjust back to picoseconds from nanoseconds */ + dport->hb_coord.read_latency *= 1000; + dport->hb_coord.write_latency *= 1000; + + return 0; +} + static int add_host_bridge_dport(struct device *match, void *arg) { + int ret; acpi_status rc; struct device *bridge; struct cxl_dport *dport; @@ -440,6 +583,10 @@ static int add_host_bridge_dport(struct device *match, void *arg) if (IS_ERR(dport)) return PTR_ERR(dport); + ret = get_genport_coordinates(match, dport); + if (ret) + dev_dbg(match, "Failed to get generic port perf coordinates.\n"); + return 0; } @@ -656,6 +803,7 @@ static int cxl_acpi_probe(struct platform_device *pdev) { int rc; struct resource *cxl_res; + struct cxl_root *cxl_root; struct cxl_port *root_port; struct device *host = &pdev->dev; struct acpi_device *adev = ACPI_COMPANION(host); @@ -675,9 +823,10 @@ static int cxl_acpi_probe(struct platform_device *pdev) cxl_res->end = -1; cxl_res->flags = IORESOURCE_MEM; - root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); - if (IS_ERR(root_port)) - return PTR_ERR(root_port); + cxl_root = devm_cxl_add_root(host, &acpi_root_ops); + if (IS_ERR(cxl_root)) + return PTR_ERR(cxl_root); + root_port = &cxl_root->port; rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, add_host_bridge_dport); diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 1f66b5d4d93556868a5d413b520882f3d94b2f6f..9259bcc6773c804ccace2478c9f6f09267b48c9d 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -13,5 +13,6 @@ cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o +cxl_core-y += cdat.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c new file mode 100644 index 0000000000000000000000000000000000000000..6fe11546889fabb48e997fda83e1f184a64179c6 --- /dev/null +++ b/drivers/cxl/core/cdat.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include "cxlpci.h" +#include "cxlmem.h" +#include "core.h" +#include "cxl.h" + +struct dsmas_entry { + struct range dpa_range; + u8 handle; + struct access_coordinate coord; + + int entries; + int qos_class; +}; + +static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dsmas *dsmas; + int size = sizeof(*hdr) + sizeof(*dsmas); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSMAS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dsmas = (struct acpi_cdat_dsmas *)(hdr + 1); + + dent = kzalloc(sizeof(*dent), GFP_KERNEL); + if (!dent) + return -ENOMEM; + + dent->handle = dsmas->dsmad_handle; + dent->dpa_range.start = le64_to_cpu((__force __le64)dsmas->dpa_base_address); + dent->dpa_range.end = le64_to_cpu((__force __le64)dsmas->dpa_base_address) + + le64_to_cpu((__force __le64)dsmas->dpa_length) - 1; + + rc = xa_insert(dsmas_xa, dent->handle, dent, GFP_KERNEL); + if (rc) { + kfree(dent); + return rc; + } + + return 0; +} + +static void cxl_access_coordinate_set(struct access_coordinate *coord, + int access, unsigned int val) +{ + switch (access) { + case ACPI_HMAT_ACCESS_LATENCY: + coord->read_latency = val; + coord->write_latency = val; + break; + case ACPI_HMAT_READ_LATENCY: + coord->read_latency = val; + break; + case ACPI_HMAT_WRITE_LATENCY: + coord->write_latency = val; + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + coord->read_bandwidth = val; + coord->write_bandwidth = val; + break; + case ACPI_HMAT_READ_BANDWIDTH: + coord->read_bandwidth = val; + break; + case ACPI_HMAT_WRITE_BANDWIDTH: + coord->write_bandwidth = val; + break; + } +} + +static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dslbis *dslbis; + int size = sizeof(*hdr) + sizeof(*dslbis); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + __le64 le_base; + __le16 le_val; + u64 val; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dslbis = (struct acpi_cdat_dslbis *)(hdr + 1); + + /* Skip unrecognized data type */ + if (dslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + /* Not a memory type, skip */ + if ((dslbis->flags & ACPI_HMAT_MEMORY_HIERARCHY) != ACPI_HMAT_MEMORY) + return 0; + + dent = xa_load(dsmas_xa, dslbis->handle); + if (!dent) { + pr_warn("No matching DSMAS entry for DSLBIS entry.\n"); + return 0; + } + + le_base = (__force __le64)dslbis->entry_base_unit; + le_val = (__force __le16)dslbis->entry[0]; + rc = check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val); + if (rc) + pr_warn("DSLBIS value overflowed.\n"); + + cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val); + + return 0; +} + +static int cdat_table_parse_output(int rc) +{ + if (rc < 0) + return rc; + if (rc == 0) + return -ENOENT; + + return 0; +} + +static int cxl_cdat_endpoint_process(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + int rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, + dsmas_xa, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + return rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler, + dsmas_xa, port->cdat.table); + return cdat_table_parse_output(rc); +} + +static int cxl_port_perf_data_calculate(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + struct access_coordinate c; + struct dsmas_entry *dent; + int valid_entries = 0; + unsigned long index; + int rc; + + rc = cxl_endpoint_get_perf_coordinates(port, &c); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n"); + return rc; + } + + struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); + + if (!cxl_root) + return -ENODEV; + + if (!cxl_root->ops || !cxl_root->ops->qos_class) + return -EOPNOTSUPP; + + xa_for_each(dsmas_xa, index, dent) { + int qos_class; + + dent->coord.read_latency = dent->coord.read_latency + + c.read_latency; + dent->coord.write_latency = dent->coord.write_latency + + c.write_latency; + dent->coord.read_bandwidth = min_t(int, c.read_bandwidth, + dent->coord.read_bandwidth); + dent->coord.write_bandwidth = min_t(int, c.write_bandwidth, + dent->coord.write_bandwidth); + + dent->entries = 1; + rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1, + &qos_class); + if (rc != 1) + continue; + + valid_entries++; + dent->qos_class = qos_class; + } + + if (!valid_entries) + return -ENOENT; + + return 0; +} + +static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, + struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf; + + dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL); + if (!dpa_perf) + return; + + dpa_perf->dpa_range = dent->dpa_range; + dpa_perf->coord = dent->coord; + dpa_perf->qos_class = dent->qos_class; + list_add_tail(&dpa_perf->list, list); + dev_dbg(dev, + "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", + dent->dpa_range.start, dpa_perf->qos_class, + dent->coord.read_bandwidth, dent->coord.write_bandwidth, + dent->coord.read_latency, dent->coord.write_latency); +} + +static void free_perf_ents(void *data) +{ + struct cxl_memdev_state *mds = data; + struct cxl_dpa_perf *dpa_perf, *n; + LIST_HEAD(discard); + + list_splice_tail_init(&mds->ram_perf_list, &discard); + list_splice_tail_init(&mds->pmem_perf_list, &discard); + list_for_each_entry_safe(dpa_perf, n, &discard, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} + +static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, + struct xarray *dsmas_xa) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = cxlds->dev; + struct range pmem_range = { + .start = cxlds->pmem_res.start, + .end = cxlds->pmem_res.end, + }; + struct range ram_range = { + .start = cxlds->ram_res.start, + .end = cxlds->ram_res.end, + }; + struct dsmas_entry *dent; + unsigned long index; + + xa_for_each(dsmas_xa, index, dent) { + if (resource_size(&cxlds->ram_res) && + range_contains(&ram_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->ram_perf_list); + else if (resource_size(&cxlds->pmem_res) && + range_contains(&pmem_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->pmem_perf_list); + else + dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", + dent->dpa_range.start); + } + + devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); +} + +static int match_cxlrd_qos_class(struct device *dev, void *data) +{ + int dev_qos_class = *(int *)data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + if (cxlrd->qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + if (cxlrd->qos_class == dev_qos_class) + return 1; + + return 0; +} + +static void cxl_qos_match(struct cxl_port *root_port, + struct list_head *work_list, + struct list_head *discard_list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, work_list, list) { + int rc; + + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) + return; + + rc = device_for_each_child(&root_port->dev, + (void *)&dpa_perf->qos_class, + match_cxlrd_qos_class); + if (!rc) + list_move_tail(&dpa_perf->list, discard_list); + } +} + +static int match_cxlrd_hb(struct device *dev, void *data) +{ + struct device *host_bridge = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + + guard(rwsem_read)(&cxl_region_rwsem); + for (int i = 0; i < cxlsd->nr_targets; i++) { + if (host_bridge == cxlsd->target[i]->dport_dev) + return 1; + } + + return 0; +} + +static void discard_dpa_perf(struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, list, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} +DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T)) + +static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + LIST_HEAD(__discard); + struct list_head *discard __free(dpa_perf) = &__discard; + struct cxl_port *root_port; + int rc; + + struct cxl_root *cxl_root __free(put_cxl_root) = + find_cxl_root(cxlmd->endpoint); + + if (!cxl_root) + return -ENODEV; + + root_port = &cxl_root->port; + + /* Check that the QTG IDs are all sane between end device and root decoders */ + cxl_qos_match(root_port, &mds->ram_perf_list, discard); + cxl_qos_match(root_port, &mds->pmem_perf_list, discard); + + /* Check to make sure that the device's host bridge is under a root decoder */ + rc = device_for_each_child(&root_port->dev, + (void *)cxlmd->endpoint->host_bridge, + match_cxlrd_hb); + if (!rc) { + list_splice_tail_init(&mds->ram_perf_list, discard); + list_splice_tail_init(&mds->pmem_perf_list, discard); + } + + return rc; +} + +static void discard_dsmas(struct xarray *xa) +{ + unsigned long index; + void *ent; + + xa_for_each(xa, index, ent) { + xa_erase(xa, index); + kfree(ent); + } + xa_destroy(xa); +} +DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T)) + +void cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct xarray __dsmas_xa; + struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa; + int rc; + + xa_init(&__dsmas_xa); + if (!port->cdat.table) + return; + + rc = cxl_cdat_endpoint_process(port, dsmas_xa); + if (rc < 0) { + dev_dbg(&port->dev, "Failed to parse CDAT: %d\n", rc); + return; + } + + rc = cxl_port_perf_data_calculate(port, dsmas_xa); + if (rc) { + dev_dbg(&port->dev, "Failed to do perf coord calculations.\n"); + return; + } + + cxl_memdev_set_qos_class(cxlds, dsmas_xa); + cxl_qos_class_verify(cxlmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); + +static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_sslbis *sslbis; + int size = sizeof(header->cdat) + sizeof(*sslbis); + struct cxl_port *port = arg; + struct device *dev = &port->dev; + struct acpi_cdat_sslbe *entry; + int remain, entries, i; + u16 len; + + len = le16_to_cpu((__force __le16)header->cdat.length); + remain = len - size; + if (!remain || remain % sizeof(*entry) || + (unsigned long)header + len > end) { + dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len); + return -EINVAL; + } + + /* Skip common header */ + sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header + + sizeof(header->cdat)); + + /* Unrecognized data type, we can skip */ + if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + entries = remain / sizeof(*entry); + entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis)); + + for (i = 0; i < entries; i++) { + u16 x = le16_to_cpu((__force __le16)entry->portx_id); + u16 y = le16_to_cpu((__force __le16)entry->porty_id); + __le64 le_base; + __le16 le_val; + struct cxl_dport *dport; + unsigned long index; + u16 dsp_id; + u64 val; + + switch (x) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = y; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + switch (y) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = x; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + dsp_id = ACPI_CDAT_SSLBIS_ANY_PORT; + break; + default: + dsp_id = y; + break; + } + break; + default: + dsp_id = x; + break; + } + + le_base = (__force __le64)sslbis->entry_base_unit; + le_val = (__force __le16)entry->latency_or_bandwidth; + + if (check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val)) + dev_warn(dev, "SSLBIS value overflowed!\n"); + + xa_for_each(&port->dports, index, dport) { + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) + cxl_access_coordinate_set(&dport->sw_coord, + sslbis->data_type, + val); + } + + entry++; + } + + return 0; +} + +void cxl_switch_parse_cdat(struct cxl_port *port) +{ + int rc; + + if (!port->cdat.table) + return; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, + port, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); + +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 86d7ba23235e3bdefb567e3dc3cb656b4f9593c0..3b64fb1b9ed058055fa80220fc2b83b109cc6e17 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -88,4 +88,6 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +long cxl_pci_get_latency(struct pci_dev *pdev); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 36270dcfb42ef2f6917ea9036c8903e44584e4a2..27166a41170579a9441a2f9bf3e2a915ed85d893 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -63,6 +63,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), + CXL_CMD(GET_TIMESTAMP, 0, 0x8, 0), }; /* @@ -836,54 +837,37 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -static const uuid_t gen_media_event_uuid = - UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6); - -/* - * DRAM Event Record - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 - */ -static const uuid_t dram_event_uuid = - UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24); - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -static const uuid_t mem_mod_event_uuid = - UUID_INIT(0xfe927475, 0xdd59, 0x4339, - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74); - -static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, - enum cxl_event_log_type type, - struct cxl_event_record_raw *record) +void cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt) { - uuid_t *id = &record->hdr.id; - - if (uuid_equal(id, &gen_media_event_uuid)) { - struct cxl_event_gen_media *rec = - (struct cxl_event_gen_media *)record; + if (event_type == CXL_CPER_EVENT_GEN_MEDIA) + trace_cxl_general_media(cxlmd, type, &evt->gen_media); + else if (event_type == CXL_CPER_EVENT_DRAM) + trace_cxl_dram(cxlmd, type, &evt->dram); + else if (event_type == CXL_CPER_EVENT_MEM_MODULE) + trace_cxl_memory_module(cxlmd, type, &evt->mem_module); + else + trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); +} +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL); - trace_cxl_general_media(cxlmd, type, rec); - } else if (uuid_equal(id, &dram_event_uuid)) { - struct cxl_event_dram *rec = (struct cxl_event_dram *)record; +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + struct cxl_event_record_raw *record) +{ + enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC; + const uuid_t *uuid = &record->id; - trace_cxl_dram(cxlmd, type, rec); - } else if (uuid_equal(id, &mem_mod_event_uuid)) { - struct cxl_event_mem_module *rec = - (struct cxl_event_mem_module *)record; + if (uuid_equal(uuid, &CXL_EVENT_GEN_MEDIA_UUID)) + ev_type = CXL_CPER_EVENT_GEN_MEDIA; + else if (uuid_equal(uuid, &CXL_EVENT_DRAM_UUID)) + ev_type = CXL_CPER_EVENT_DRAM; + else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) + ev_type = CXL_CPER_EVENT_MEM_MODULE; - trace_cxl_memory_module(cxlmd, type, rec); - } else { - /* For unknown record types print just the header */ - trace_cxl_generic_event(cxlmd, type, record); - } + cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); } static int cxl_clear_event_record(struct cxl_memdev_state *mds, @@ -926,7 +910,10 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, */ i = 0; for (cnt = 0; cnt < total; cnt++) { - payload->handles[i++] = get_pl->records[cnt].hdr.handle; + struct cxl_event_record_raw *raw = &get_pl->records[cnt]; + struct cxl_event_generic *gen = &raw->event.generic; + + payload->handles[i++] = gen->hdr.handle; dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log, le16_to_cpu(payload->handles[i])); @@ -991,8 +978,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, break; for (i = 0; i < nr_rec; i++) - cxl_event_trace_record(cxlmd, type, - &payload->records[i]); + __cxl_event_trace_record(cxlmd, type, + &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) trace_cxl_overflow(cxlmd, type, payload); @@ -1404,6 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; + INIT_LIST_HEAD(&mds->ram_perf_list); + INIT_LIST_HEAD(&mds->pmem_perf_list); return mds; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 2f43d368ba07308c27a2aba69a3a3330f7413325..dae8802ecdb01ee748e3891120bc0011e9e8894e 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -114,7 +114,7 @@ static DEVICE_ATTR_RO(serial); static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_node(dev)); + return sysfs_emit(buf, "%d\n", dev_to_node(dev)); } static DEVICE_ATTR_RO(numa_node); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 37e1652afbc7eac56fffbb0a5692ea2a1cd82411..6c9c8d92f8f71401af70fec26be60e0339c18c64 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -979,3 +980,38 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); + +static int cxl_flit_size(struct pci_dev *pdev) +{ + if (cxl_pci_flit_256(pdev)) + return 256; + + return 68; +} + +/** + * cxl_pci_get_latency - calculate the link latency for the PCIe link + * @pdev: PCI device + * + * return: calculated latency or 0 for no latency + * + * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation + * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency + * LinkProgationLatency is negligible, so 0 will be used + * RetimerLatency is assumed to be negligible and 0 will be used + * FlitLatency = FlitSize / LinkBandwidth + * FlitSize is defined by spec. CXL rev3.0 4.2.1. + * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used. + * The FlitLatency is converted to picoseconds. + */ +long cxl_pci_get_latency(struct pci_dev *pdev) +{ + long bw; + + bw = pcie_link_speed_mbps(pdev); + if (bw < 0) + return 0; + bw /= BITS_PER_BYTE; + + return cxl_flit_size(pdev) * MEGA / bw; +} diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index fc94f5240327127743336b47e9228f8871deb67d..e69625a8d6a1d7229b7be924d7b005f4e7f1f67a 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -64,14 +64,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data) struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) { - struct cxl_port *port = find_cxl_root(cxlmd->endpoint); + struct cxl_root *cxl_root __free(put_cxl_root) = + find_cxl_root(cxlmd->endpoint); struct device *dev; - if (!port) + if (!cxl_root) return NULL; - dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge); - put_device(&port->dev); + dev = device_find_child(&cxl_root->port.dev, NULL, match_nvdimm_bridge); if (!dev) return NULL; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b7c93bb18f6e75adfb129e175be5afcba98b10de..e59d9d37aa65009c97326fa93ec869dee709804d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -172,14 +173,10 @@ static ssize_t target_list_show(struct device *dev, { struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev); ssize_t offset; - unsigned int seq; int rc; - do { - seq = read_seqbegin(&cxlsd->target_lock); - rc = emit_target_list(cxlsd, buf); - } while (read_seqretry(&cxlsd->target_lock, seq)); - + guard(rwsem_read)(&cxl_region_rwsem); + rc = emit_target_list(cxlsd, buf); if (rc < 0) return rc; offset = rc; @@ -541,7 +538,10 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - kfree(port); + if (is_cxl_root(port)) + kfree(to_cxl_root(port)); + else + kfree(port); } static ssize_t decoders_committed_show(struct device *dev, @@ -669,17 +669,31 @@ static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_dport *parent_dport) { - struct cxl_port *port; + struct cxl_root *cxl_root __free(kfree) = NULL; + struct cxl_port *port, *_port __free(kfree) = NULL; struct device *dev; int rc; - port = kzalloc(sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); + /* No parent_dport, root cxl_port */ + if (!parent_dport) { + cxl_root = kzalloc(sizeof(*cxl_root), GFP_KERNEL); + if (!cxl_root) + return ERR_PTR(-ENOMEM); + } else { + _port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!_port) + return ERR_PTR(-ENOMEM); + } rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); if (rc < 0) - goto err; + return ERR_PTR(rc); + + if (cxl_root) + port = &no_free_ptr(cxl_root)->port; + else + port = no_free_ptr(_port); + port->id = rc; port->uport_dev = uport_dev; @@ -731,10 +745,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, dev->type = &cxl_port_type; return port; - -err: - kfree(port); - return ERR_PTR(rc); } static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, @@ -841,6 +851,9 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) return ERR_PTR(rc); + if (parent_dport && dev_is_pci(uport_dev)) + port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev)); + return port; err: @@ -884,6 +897,22 @@ struct cxl_port *devm_cxl_add_port(struct device *host, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops) +{ + struct cxl_root *cxl_root; + struct cxl_port *port; + + port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); + if (IS_ERR(port)) + return (struct cxl_root *)port; + + cxl_root = to_cxl_root(port); + cxl_root->ops = ops; + return cxl_root; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, CXL); + struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) { /* There is no pci_bus associated with a CXL platform-root port */ @@ -939,7 +968,7 @@ static bool dev_is_cxl_root_child(struct device *dev) return false; } -struct cxl_port *find_cxl_root(struct cxl_port *port) +struct cxl_root *find_cxl_root(struct cxl_port *port) { struct cxl_port *iter = port; @@ -949,10 +978,19 @@ struct cxl_port *find_cxl_root(struct cxl_port *port) if (!iter) return NULL; get_device(&iter->dev); - return iter; + return to_cxl_root(iter); } EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); +void put_cxl_root(struct cxl_root *cxl_root) +{ + if (!cxl_root) + return; + + put_device(&cxl_root->port.dev); +} +EXPORT_SYMBOL_NS_GPL(put_cxl_root, CXL); + static struct cxl_dport *find_dport(struct cxl_port *port, int id) { struct cxl_dport *dport; @@ -1108,6 +1146,9 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + if (dev_is_pci(dport_dev)) + dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); + return dport; } @@ -1633,7 +1674,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL); static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, struct cxl_port *port, int *target_map) { - int i, rc = 0; + int i; if (!target_map) return 0; @@ -1643,19 +1684,16 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, if (xa_empty(&port->dports)) return -EINVAL; - write_seqlock(&cxlsd->target_lock); - for (i = 0; i < cxlsd->nr_targets; i++) { + guard(rwsem_write)(&cxl_region_rwsem); + for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { struct cxl_dport *dport = find_dport(port, target_map[i]); - if (!dport) { - rc = -ENXIO; - break; - } + if (!dport) + return -ENXIO; cxlsd->target[i] = dport; } - write_sequnlock(&cxlsd->target_lock); - return rc; + return 0; } struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) @@ -1725,7 +1763,6 @@ static int cxl_switch_decoder_init(struct cxl_port *port, return -EINVAL; cxlsd->nr_targets = nr_targets; - seqlock_init(&cxlsd->target_lock); return cxl_decoder_init(port, &cxlsd->cxld); } @@ -2059,6 +2096,80 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); +static void combine_coordinates(struct access_coordinate *c1, + struct access_coordinate *c2) +{ + if (c2->write_bandwidth) + c1->write_bandwidth = min(c1->write_bandwidth, + c2->write_bandwidth); + c1->write_latency += c2->write_latency; + + if (c2->read_bandwidth) + c1->read_bandwidth = min(c1->read_bandwidth, + c2->read_bandwidth); + c1->read_latency += c2->read_latency; +} + +/** + * cxl_endpoint_get_perf_coordinates - Retrieve performance numbers stored in dports + * of CXL path + * @port: endpoint cxl_port + * @coord: output performance data + * + * Return: errno on failure, 0 on success. + */ +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord) +{ + struct access_coordinate c = { + .read_bandwidth = UINT_MAX, + .write_bandwidth = UINT_MAX, + }; + struct cxl_port *iter = port; + struct cxl_dport *dport; + struct pci_dev *pdev; + unsigned int bw; + + if (!is_cxl_endpoint(port)) + return -EINVAL; + + dport = iter->parent_dport; + + /* + * Exit the loop when the parent port of the current port is cxl root. + * The iterative loop starts at the endpoint and gathers the + * latency of the CXL link from the current iter to the next downstream + * port each iteration. If the parent is cxl root then there is + * nothing to gather. + */ + while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { + combine_coordinates(&c, &dport->sw_coord); + c.write_latency += dport->link_latency; + c.read_latency += dport->link_latency; + + iter = to_cxl_port(iter->dev.parent); + dport = iter->parent_dport; + } + + /* Augment with the generic port (host bridge) perf data */ + combine_coordinates(&c, &dport->hb_coord); + + /* Get the calculated PCI paths bandwidth */ + pdev = to_pci_dev(port->uport_dev->parent); + bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); + if (bw == 0) + return -ENXIO; + bw /= BITS_PER_BYTE; + + c.write_bandwidth = min(c.write_bandwidth, bw); + c.read_bandwidth = min(c.read_bandwidth, bw); + + *coord = c; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL); + /* for user tooling to ensure port disable work has completed */ static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count) { diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3e817a6f94c6a4d2ac5113558a6c7633f7120821..0f05692bfec3946841a766c8583bc1a3b526073e 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -397,7 +397,7 @@ static ssize_t interleave_ways_store(struct device *dev, return rc; /* - * Even for x3, x9, and x12 interleaves the region interleave must be a + * Even for x3, x6, and x12 interleaves the region interleave must be a * power of 2 multiple of the host bridge interleave. */ if (!is_power_of_2(val / cxld->interleave_ways) || @@ -552,8 +552,9 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) res = alloc_free_mem_region(cxlrd->res, size, SZ_256M, dev_name(&cxlr->dev)); if (IS_ERR(res)) { - dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n", - PTR_ERR(res)); + dev_dbg(&cxlr->dev, + "HPA allocation error (%ld) for size:%pap in %s %pr\n", + PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res); return PTR_ERR(res); } @@ -2083,13 +2084,13 @@ static struct cxl_region *to_cxl_region(struct device *dev) return container_of(dev, struct cxl_region, dev); } -static void unregister_region(void *dev) +static void unregister_region(void *_cxlr) { - struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region *cxlr = _cxlr; struct cxl_region_params *p = &cxlr->params; int i; - device_del(dev); + device_del(&cxlr->dev); /* * Now that region sysfs is shutdown, the parameter block is now @@ -2100,7 +2101,7 @@ static void unregister_region(void *dev) detach_target(cxlr, i); cxl_region_iomem_release(cxlr); - put_device(dev); + put_device(&cxlr->dev); } static struct lock_class_key cxl_region_key; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a0b5819bc70b3075b37d5452da0ac53efbf382f8..89445435303aac4d043c964a0ada866548889917 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -181,6 +181,7 @@ TRACE_EVENT(cxl_overflow, * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign; * pass the dev, log, and CXL event header + * NOTE: The uuid must be assigned by the specific trace event * 3) Use CXL_EVT_TP_printk() instead of TP_printk() * * See the generic_event tracepoint as an example. @@ -203,7 +204,6 @@ TRACE_EVENT(cxl_overflow, __assign_str(host, dev_name((cxlmd)->dev.parent)); \ __entry->log = (l); \ __entry->serial = (cxlmd)->cxlds->serial; \ - memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ __entry->hdr_handle = le16_to_cpu((hdr).handle); \ @@ -225,9 +225,9 @@ TRACE_EVENT(cxl_overflow, TRACE_EVENT(cxl_generic_event, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_event_record_raw *rec), + const uuid_t *uuid, struct cxl_event_generic *gen_rec), - TP_ARGS(cxlmd, log, rec), + TP_ARGS(cxlmd, log, uuid, gen_rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -235,8 +235,9 @@ TRACE_EVENT(cxl_generic_event, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); - memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); + CXL_EVT_TP_fast_assign(cxlmd, log, gen_rec->hdr); + memcpy(&__entry->hdr_uuid, uuid, sizeof(uuid_t)); + memcpy(__entry->data, gen_rec->data, CXL_EVENT_RECORD_DATA_LENGTH); ), CXL_EVT_TP_printk("%s", @@ -337,6 +338,7 @@ TRACE_EVENT(cxl_general_media, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_GEN_MEDIA_UUID, sizeof(uuid_t)); /* General Media */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -423,6 +425,7 @@ TRACE_EVENT(cxl_dram, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_DRAM_UUID, sizeof(uuid_t)); /* DRAM */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -570,6 +573,7 @@ TRACE_EVENT(cxl_memory_module, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_MEM_MODULE_UUID, sizeof(uuid_t)); /* Memory Module Event */ __entry->event_type = rec->event_type; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 687043ece1018c41c256c02cd697749d7916a42f..b6017c0c57b4d5e69dfe45011b7a8b3f5bf0b913 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /** @@ -412,7 +413,6 @@ struct cxl_endpoint_decoder { /** * struct cxl_switch_decoder - Switch specific CXL HDM Decoder * @cxld: base cxl_decoder object - * @target_lock: coordinate coherent reads of the target list * @nr_targets: number of elements in @target * @target: active ordered target list in current decoder configuration * @@ -424,7 +424,6 @@ struct cxl_endpoint_decoder { */ struct cxl_switch_decoder { struct cxl_decoder cxld; - seqlock_t target_lock; int nr_targets; struct cxl_dport *target[]; }; @@ -590,6 +589,7 @@ struct cxl_dax_region { * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs + * @pci_latency: Upstream latency in picoseconds */ struct cxl_port { struct device dev; @@ -612,6 +612,30 @@ struct cxl_port { size_t length; } cdat; bool cdat_available; + long pci_latency; +}; + +/** + * struct cxl_root - logical collection of root cxl_port items + * + * @port: cxl_port member + * @ops: cxl root operations + */ +struct cxl_root { + struct cxl_port port; + const struct cxl_root_ops *ops; +}; + +static inline struct cxl_root * +to_cxl_root(const struct cxl_port *port) +{ + return container_of(port, struct cxl_root, port); +} + +struct cxl_root_ops { + int (*qos_class)(struct cxl_root *cxl_root, + struct access_coordinate *coord, int entries, + int *qos_class); }; static inline struct cxl_dport * @@ -634,6 +658,9 @@ struct cxl_rcrb_info { * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks + * @sw_coord: access coordinates (performance) for switch from CDAT + * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge) + * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { struct device *dport_dev; @@ -643,6 +670,9 @@ struct cxl_dport { bool rch; struct cxl_port *port; struct cxl_regs regs; + struct access_coordinate sw_coord; + struct access_coordinate hb_coord; + long link_latency; }; /** @@ -700,7 +730,12 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport_dev, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); -struct cxl_port *find_cxl_root(struct cxl_port *port); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops); +struct cxl_root *find_cxl_root(struct cxl_port *port); +void put_cxl_root(struct cxl_root *cxl_root); +DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_cxl_root(_T)) + int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); void cxl_bus_drain(void); @@ -839,6 +874,12 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) } #endif +void cxl_endpoint_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_port *port); + +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index a2fcbca253f3983a6c4bfb1b2f964314e1b250d6..5303d6942b880af65dcf8e77b02d26626c2bb94d 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -391,6 +393,20 @@ enum cxl_devtype { CXL_DEVTYPE_CLASSMEM, }; +/** + * struct cxl_dpa_perf - DPA performance property entry + * @list - list entry + * @dpa_range - range for DPA address + * @coord - QoS performance data (i.e. latency, bandwidth) + * @qos_class - QoS Class cookies + */ +struct cxl_dpa_perf { + struct list_head list; + struct range dpa_range; + struct access_coordinate coord; + int qos_class; +}; + /** * struct cxl_dev_state - The driver device state * @@ -455,6 +471,8 @@ struct cxl_dev_state { * @security: security driver state info * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands + * @ram_perf_list: performance data entries matched to RAM + * @pmem_perf_list: performance data entries matched to PMEM * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -475,6 +493,10 @@ struct cxl_memdev_state { u64 active_persistent_bytes; u64 next_volatile_bytes; u64 next_persistent_bytes; + + struct list_head ram_perf_list; + struct list_head pmem_perf_list; + struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; @@ -503,6 +525,7 @@ enum cxl_opcode { CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_TRANSFER_FW = 0x0201, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, + CXL_MBOX_OP_GET_TIMESTAMP = 0x0300, CXL_MBOX_OP_SET_TIMESTAMP = 0x0301, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, CXL_MBOX_OP_GET_LOG = 0x0401, @@ -580,25 +603,28 @@ struct cxl_mbox_identify { } __packed; /* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + * General Media Event Record UUID + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 */ -struct cxl_event_record_hdr { - uuid_t id; - u8 length; - u8 flags[3]; - __le16 handle; - __le16 related_handle; - __le64 timestamp; - u8 maint_op_class; - u8 reserved[15]; -} __packed; +#define CXL_EVENT_GEN_MEDIA_UUID \ + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, 0x85, 0xa9, 0x08, 0x8b, 0x16, \ + 0x21, 0xeb, 0xa6) -#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_record_raw { - struct cxl_event_record_hdr hdr; - u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; -} __packed; +/* + * DRAM Event Record UUID + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CXL_EVENT_DRAM_UUID \ + UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, \ + 0x5c, 0x96, 0x24) + +/* + * Memory Module Event Record UUID + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CXL_EVENT_MEM_MODULE_UUID \ + UUID_INIT(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, 0x79, 0xba, 0xb1, \ + 0x13, 0xb7, 0x74) /* * Get Event Records output payload @@ -641,74 +667,6 @@ struct cxl_mbox_clear_event_payload { } __packed; #define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 -struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; - u8 device[3]; - u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; - u8 reserved[46]; -} __packed; - -/* - * DRAM Event Record - DER - * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 - */ -#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 -struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; - u8 nibble_mask[3]; - u8 bank_group; - u8 bank; - u8 row[3]; - u8 column[2]; - u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; -} __packed; - -/* - * Get Health Info Record - * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 - */ -struct cxl_get_health_info { - u8 health_status; - u8 media_status; - u8 add_status; - u8 life_used; - u8 device_temp[2]; - u8 dirty_shutdown_cnt[4]; - u8 cor_vol_err_cnt[4]; - u8 cor_per_err_cnt[4]; -} __packed; - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -struct cxl_event_mem_module { - struct cxl_event_record_hdr hdr; - u8 event_type; - struct cxl_get_health_info info; - u8 reserved[0x3d]; -} __packed; - struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; @@ -866,6 +824,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); +void cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 0fa4799ea316cd802c97feda11fb6e3c88aaa138..711b05d9a370e91b49beffbc5494542d8861d31e 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -85,6 +85,19 @@ struct cdat_entry_header { __le16 length; } __packed; +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static inline bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u16 lnksta2; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e087febf9af047c81dfb11b06d91d92d442d586c..c5c9d8e0d88d69fcc9f031e1bd46ba7c44de4fd4 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -215,23 +215,78 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); +static ssize_t ram_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->ram_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_ram_qos_class = + __ATTR(qos_class, 0444, ram_qos_class_show, NULL); + +static ssize_t pmem_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->pmem_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_pmem_qos_class = + __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); + static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { - if (a == &dev_attr_trigger_poison_list.attr) { - struct device *dev = kobj_to_dev(kobj); - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = - to_cxl_memdev_state(cxlmd->cxlds); + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + if (a == &dev_attr_trigger_poison_list.attr) if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) return 0; - } + + if (a == &dev_attr_pmem_qos_class.attr) + if (list_empty(&mds->pmem_perf_list)) + return 0; + + if (a == &dev_attr_ram_qos_class.attr) + if (list_empty(&mds->ram_perf_list)) + return 0; + return a->mode; } static struct attribute *cxl_mem_attrs[] = { &dev_attr_trigger_poison_list.attr, + &dev_attr_ram_qos_class.attr, + &dev_attr_pmem_qos_class.attr, NULL }; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0155fb66b580d7f939e3f2d92a34b5c3c8a89586..4fd1f207c84ee53a857e417a5fc2260fb43b9733 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -969,6 +970,61 @@ static struct pci_driver cxl_pci_driver = { }, }; +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) +static void cxl_cper_event_call(enum cxl_event_type ev_type, + struct cxl_cper_event_rec *rec) +{ + struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; + struct pci_dev *pdev __free(pci_dev_put) = NULL; + enum cxl_event_log_type log_type; + struct cxl_dev_state *cxlds; + unsigned int devfn; + u32 hdr_flags; + + devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); + pdev = pci_get_domain_bus_and_slot(device_id->segment_num, + device_id->bus_num, devfn); + if (!pdev) + return; + + guard(pci_dev)(pdev); + if (pdev->driver != &cxl_pci_driver) + return; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + /* Fabricate a log type */ + hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); + log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); + + cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, + &uuid_null, &rec->event); +} + +static int __init cxl_pci_driver_init(void) +{ + int rc; + + rc = cxl_cper_register_callback(cxl_cper_event_call); + if (rc) + return rc; + + rc = pci_register_driver(&cxl_pci_driver); + if (rc) + cxl_cper_unregister_callback(cxl_cper_event_call); + + return rc; +} + +static void __exit cxl_pci_driver_exit(void) +{ + pci_unregister_driver(&cxl_pci_driver); + cxl_cper_unregister_callback(cxl_cper_event_call); +} + +module_init(cxl_pci_driver_init); +module_exit(cxl_pci_driver_exit); MODULE_LICENSE("GPL v2"); -module_pci_driver(cxl_pci_driver); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 47bc8e0b859077776c06fc1daee901ab49cbdd2d..97c21566677aa3b4dbdd84d7d656198d53af63fd 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -69,6 +69,8 @@ static int cxl_switch_port_probe(struct cxl_port *port) if (rc < 0) return rc; + cxl_switch_parse_cdat(port); + cxlhdm = devm_cxl_setup_hdm(port, NULL); if (!IS_ERR(cxlhdm)) return devm_cxl_enumerate_decoders(cxlhdm, NULL); @@ -109,6 +111,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); + cxl_endpoint_parse_cdat(port); get_device(&cxlmd->dev); rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd); @@ -127,14 +130,15 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) * This can't fail in practice as CXL root exit unregisters all * descendant ports and that in turn synchronizes with cxl_port_probe() */ - root = find_cxl_root(port); + struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); + + root = &cxl_root->port; /* * Now that all endpoint decoders are successfully enumerated, try to * assemble regions from committed decoders */ device_for_each_child(&port->dev, root, discover_region); - put_device(&root->dev); return 0; } diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 70ba506dabab5f7aa9eb95741bd40636d1535c86..e928f2ca0f1e9adc58594d6b4552d64dae29df34 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -378,6 +378,20 @@ config LPC18XX_DMAMUX Enable support for DMA on NXP LPC18xx/43xx platforms with PL080 and multiplexed DMA request lines. +config LS2X_APB_DMA + tristate "Loongson LS2X APB DMA support" + depends on LOONGARCH || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the Loongson LS2X APB DMA controller driver. The + DMA controller is having single DMA channel which can be + configured for different peripherals like audio, nand, sdio + etc which is in APB bus. + + This DMA controller transfers data from memory to peripheral fifo. + It does not support memory to memory data transfer. + config MCF_EDMA tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs" depends on M5441x || COMPILE_TEST diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 83553a97a010e157b285ad6c47557df31c8bc835..dfd40d14e4089d81ce489429f141a896bfc549e7 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-y += idxd/ obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o +obj-$(CONFIG_LS2X_APB_DMA) += ls2x-apb-dma.o obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o obj-$(CONFIG_MILBEAUT_XDMAC) += milbeaut-xdmac.o obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 5b63996640d9d3a210f789a89b6e7ffa054d96e2..9588773dd2eb670a2f6115fdaef39a0e88248015 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -57,6 +57,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -740,7 +742,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index 2457a420c13d72cde2509f6a446269b166b06cdc..4e339c04fc1ea1e973a385250a144945984ae1fd 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -81,9 +81,13 @@ #define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438 #define AXI_DMAC_REG_PARTIAL_XFER_LEN 0x44c #define AXI_DMAC_REG_PARTIAL_XFER_ID 0x450 +#define AXI_DMAC_REG_CURRENT_SG_ID 0x454 +#define AXI_DMAC_REG_SG_ADDRESS 0x47c +#define AXI_DMAC_REG_SG_ADDRESS_HIGH 0x4bc #define AXI_DMAC_CTRL_ENABLE BIT(0) #define AXI_DMAC_CTRL_PAUSE BIT(1) +#define AXI_DMAC_CTRL_ENABLE_SG BIT(2) #define AXI_DMAC_IRQ_SOT BIT(0) #define AXI_DMAC_IRQ_EOT BIT(1) @@ -97,20 +101,35 @@ /* The maximum ID allocated by the hardware is 31 */ #define AXI_DMAC_SG_UNUSED 32U +/* Flags for axi_dmac_hw_desc.flags */ +#define AXI_DMAC_HW_FLAG_LAST BIT(0) +#define AXI_DMAC_HW_FLAG_IRQ BIT(1) + +struct axi_dmac_hw_desc { + u32 flags; + u32 id; + u64 dest_addr; + u64 src_addr; + u64 next_sg_addr; + u32 y_len; + u32 x_len; + u32 src_stride; + u32 dst_stride; + u64 __pad[2]; +}; + struct axi_dmac_sg { - dma_addr_t src_addr; - dma_addr_t dest_addr; - unsigned int x_len; - unsigned int y_len; - unsigned int dest_stride; - unsigned int src_stride; - unsigned int id; unsigned int partial_len; bool schedule_when_free; + + struct axi_dmac_hw_desc *hw; + dma_addr_t hw_phys; }; struct axi_dmac_desc { struct virt_dma_desc vdesc; + struct axi_dmac_chan *chan; + bool cyclic; bool have_partial_xfer; @@ -139,6 +158,7 @@ struct axi_dmac_chan { bool hw_partial_xfer; bool hw_cyclic; bool hw_2d; + bool hw_sg; }; struct axi_dmac { @@ -213,9 +233,11 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) unsigned int flags = 0; unsigned int val; - val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); - if (val) /* Queue is full, wait for the next SOT IRQ */ - return; + if (!chan->hw_sg) { + val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); + if (val) /* Queue is full, wait for the next SOT IRQ */ + return; + } desc = chan->next_desc; @@ -229,14 +251,15 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) sg = &desc->sg[desc->num_submitted]; /* Already queued in cyclic mode. Wait for it to finish */ - if (sg->id != AXI_DMAC_SG_UNUSED) { + if (sg->hw->id != AXI_DMAC_SG_UNUSED) { sg->schedule_when_free = true; return; } - desc->num_submitted++; - if (desc->num_submitted == desc->num_sgs || - desc->have_partial_xfer) { + if (chan->hw_sg) { + chan->next_desc = NULL; + } else if (++desc->num_submitted == desc->num_sgs || + desc->have_partial_xfer) { if (desc->cyclic) desc->num_submitted = 0; /* Start again */ else @@ -246,32 +269,42 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) chan->next_desc = desc; } - sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); + sg->hw->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); - if (axi_dmac_dest_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride); - } + if (!chan->hw_sg) { + if (axi_dmac_dest_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride); + } - if (axi_dmac_src_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride); + if (axi_dmac_src_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride); + } } /* * If the hardware supports cyclic transfers and there is no callback to - * call and only a single segment, enable hw cyclic mode to avoid - * unnecessary interrupts. + * call, enable hw cyclic mode to avoid unnecessary interrupts. */ - if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback && - desc->num_sgs == 1) - flags |= AXI_DMAC_FLAG_CYCLIC; + if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback) { + if (chan->hw_sg) + desc->sg[desc->num_sgs - 1].hw->flags &= ~AXI_DMAC_HW_FLAG_IRQ; + else if (desc->num_sgs == 1) + flags |= AXI_DMAC_FLAG_CYCLIC; + } if (chan->hw_partial_xfer) flags |= AXI_DMAC_FLAG_PARTIAL_REPORT; - axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1); - axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1); + if (chan->hw_sg) { + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, (u32)sg->hw_phys); + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS_HIGH, + (u64)sg->hw_phys >> 32); + } else { + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len); + } axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); } @@ -286,9 +319,9 @@ static inline unsigned int axi_dmac_total_sg_bytes(struct axi_dmac_chan *chan, struct axi_dmac_sg *sg) { if (chan->hw_2d) - return sg->x_len * sg->y_len; + return (sg->hw->x_len + 1) * (sg->hw->y_len + 1); else - return sg->x_len; + return (sg->hw->x_len + 1); } static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan) @@ -307,9 +340,9 @@ static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan) list_for_each_entry(desc, &chan->active_descs, vdesc.node) { for (i = 0; i < desc->num_sgs; i++) { sg = &desc->sg[i]; - if (sg->id == AXI_DMAC_SG_UNUSED) + if (sg->hw->id == AXI_DMAC_SG_UNUSED) continue; - if (sg->id == id) { + if (sg->hw->id == id) { desc->have_partial_xfer = true; sg->partial_len = len; found_sg = true; @@ -348,6 +381,9 @@ static void axi_dmac_compute_residue(struct axi_dmac_chan *chan, rslt->result = DMA_TRANS_NOERROR; rslt->residue = 0; + if (chan->hw_sg) + return; + /* * We get here if the last completed segment is partial, which * means we can compute the residue from that segment onwards @@ -374,36 +410,47 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan, (completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE)) axi_dmac_dequeue_partial_xfers(chan); - do { - sg = &active->sg[active->num_completed]; - if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */ - break; - if (!(BIT(sg->id) & completed_transfers)) - break; - active->num_completed++; - sg->id = AXI_DMAC_SG_UNUSED; - if (sg->schedule_when_free) { - sg->schedule_when_free = false; - start_next = true; + if (chan->hw_sg) { + if (active->cyclic) { + vchan_cyclic_callback(&active->vdesc); + } else { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active = axi_dmac_active_desc(chan); + start_next = !!active; } + } else { + do { + sg = &active->sg[active->num_completed]; + if (sg->hw->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */ + break; + if (!(BIT(sg->hw->id) & completed_transfers)) + break; + active->num_completed++; + sg->hw->id = AXI_DMAC_SG_UNUSED; + if (sg->schedule_when_free) { + sg->schedule_when_free = false; + start_next = true; + } - if (sg->partial_len) - axi_dmac_compute_residue(chan, active); + if (sg->partial_len) + axi_dmac_compute_residue(chan, active); - if (active->cyclic) - vchan_cyclic_callback(&active->vdesc); + if (active->cyclic) + vchan_cyclic_callback(&active->vdesc); - if (active->num_completed == active->num_sgs || - sg->partial_len) { - if (active->cyclic) { - active->num_completed = 0; /* wrap around */ - } else { - list_del(&active->vdesc.node); - vchan_cookie_complete(&active->vdesc); - active = axi_dmac_active_desc(chan); + if (active->num_completed == active->num_sgs || + sg->partial_len) { + if (active->cyclic) { + active->num_completed = 0; /* wrap around */ + } else { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active = axi_dmac_active_desc(chan); + } } - } - } while (active); + } while (active); + } return start_next; } @@ -467,8 +514,12 @@ static void axi_dmac_issue_pending(struct dma_chan *c) struct axi_dmac_chan *chan = to_axi_dmac_chan(c); struct axi_dmac *dmac = chan_to_axi_dmac(chan); unsigned long flags; + u32 ctrl = AXI_DMAC_CTRL_ENABLE; - axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE); + if (chan->hw_sg) + ctrl |= AXI_DMAC_CTRL_ENABLE_SG; + + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, ctrl); spin_lock_irqsave(&chan->vchan.lock, flags); if (vchan_issue_pending(&chan->vchan)) @@ -476,22 +527,58 @@ static void axi_dmac_issue_pending(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); } -static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs) +static struct axi_dmac_desc * +axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs) { + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + struct device *dev = dmac->dma_dev.dev; + struct axi_dmac_hw_desc *hws; struct axi_dmac_desc *desc; + dma_addr_t hw_phys; unsigned int i; desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT); if (!desc) return NULL; desc->num_sgs = num_sgs; + desc->chan = chan; + + hws = dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)), + &hw_phys, GFP_ATOMIC); + if (!hws) { + kfree(desc); + return NULL; + } - for (i = 0; i < num_sgs; i++) - desc->sg[i].id = AXI_DMAC_SG_UNUSED; + for (i = 0; i < num_sgs; i++) { + desc->sg[i].hw = &hws[i]; + desc->sg[i].hw_phys = hw_phys + i * sizeof(*hws); + + hws[i].id = AXI_DMAC_SG_UNUSED; + hws[i].flags = 0; + + /* Link hardware descriptors */ + hws[i].next_sg_addr = hw_phys + (i + 1) * sizeof(*hws); + } + + /* The last hardware descriptor will trigger an interrupt */ + desc->sg[num_sgs - 1].hw->flags = AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_FLAG_IRQ; return desc; } +static void axi_dmac_free_desc(struct axi_dmac_desc *desc) +{ + struct axi_dmac *dmac = chan_to_axi_dmac(desc->chan); + struct device *dev = dmac->dma_dev.dev; + struct axi_dmac_hw_desc *hw = desc->sg[0].hw; + dma_addr_t hw_phys = desc->sg[0].hw_phys; + + dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)), + hw, hw_phys); + kfree(desc); +} + static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan, enum dma_transfer_direction direction, dma_addr_t addr, unsigned int num_periods, unsigned int period_len, @@ -508,26 +595,24 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan, segment_size = ((segment_size - 1) | chan->length_align_mask) + 1; for (i = 0; i < num_periods; i++) { - len = period_len; - - while (len > segment_size) { + for (len = period_len; len > segment_size; sg++) { if (direction == DMA_DEV_TO_MEM) - sg->dest_addr = addr; + sg->hw->dest_addr = addr; else - sg->src_addr = addr; - sg->x_len = segment_size; - sg->y_len = 1; - sg++; + sg->hw->src_addr = addr; + sg->hw->x_len = segment_size - 1; + sg->hw->y_len = 0; + sg->hw->flags = 0; addr += segment_size; len -= segment_size; } if (direction == DMA_DEV_TO_MEM) - sg->dest_addr = addr; + sg->hw->dest_addr = addr; else - sg->src_addr = addr; - sg->x_len = len; - sg->y_len = 1; + sg->hw->src_addr = addr; + sg->hw->x_len = len - 1; + sg->hw->y_len = 0; sg++; addr += len; } @@ -554,7 +639,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg( for_each_sg(sgl, sg, sg_len, i) num_sgs += DIV_ROUND_UP(sg_dma_len(sg), chan->max_length); - desc = axi_dmac_alloc_desc(num_sgs); + desc = axi_dmac_alloc_desc(chan, num_sgs); if (!desc) return NULL; @@ -563,7 +648,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg( for_each_sg(sgl, sg, sg_len, i) { if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) || !axi_dmac_check_len(chan, sg_dma_len(sg))) { - kfree(desc); + axi_dmac_free_desc(desc); return NULL; } @@ -583,7 +668,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic( { struct axi_dmac_chan *chan = to_axi_dmac_chan(c); struct axi_dmac_desc *desc; - unsigned int num_periods, num_segments; + unsigned int num_periods, num_segments, num_sgs; if (direction != chan->direction) return NULL; @@ -597,11 +682,16 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic( num_periods = buf_len / period_len; num_segments = DIV_ROUND_UP(period_len, chan->max_length); + num_sgs = num_periods * num_segments; - desc = axi_dmac_alloc_desc(num_periods * num_segments); + desc = axi_dmac_alloc_desc(chan, num_sgs); if (!desc) return NULL; + /* Chain the last descriptor to the first, and remove its "last" flag */ + desc->sg[num_sgs - 1].hw->next_sg_addr = desc->sg[0].hw_phys; + desc->sg[num_sgs - 1].hw->flags &= ~AXI_DMAC_HW_FLAG_LAST; + axi_dmac_fill_linear_sg(chan, direction, buf_addr, num_periods, period_len, desc->sg); @@ -653,26 +743,26 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( return NULL; } - desc = axi_dmac_alloc_desc(1); + desc = axi_dmac_alloc_desc(chan, 1); if (!desc) return NULL; if (axi_dmac_src_is_mem(chan)) { - desc->sg[0].src_addr = xt->src_start; - desc->sg[0].src_stride = xt->sgl[0].size + src_icg; + desc->sg[0].hw->src_addr = xt->src_start; + desc->sg[0].hw->src_stride = xt->sgl[0].size + src_icg; } if (axi_dmac_dest_is_mem(chan)) { - desc->sg[0].dest_addr = xt->dst_start; - desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg; + desc->sg[0].hw->dest_addr = xt->dst_start; + desc->sg[0].hw->dst_stride = xt->sgl[0].size + dst_icg; } if (chan->hw_2d) { - desc->sg[0].x_len = xt->sgl[0].size; - desc->sg[0].y_len = xt->numf; + desc->sg[0].hw->x_len = xt->sgl[0].size - 1; + desc->sg[0].hw->y_len = xt->numf - 1; } else { - desc->sg[0].x_len = xt->sgl[0].size * xt->numf; - desc->sg[0].y_len = 1; + desc->sg[0].hw->x_len = xt->sgl[0].size * xt->numf - 1; + desc->sg[0].hw->y_len = 0; } if (flags & DMA_CYCLIC) @@ -688,7 +778,7 @@ static void axi_dmac_free_chan_resources(struct dma_chan *c) static void axi_dmac_desc_free(struct virt_dma_desc *vdesc) { - kfree(container_of(vdesc, struct axi_dmac_desc, vdesc)); + axi_dmac_free_desc(to_axi_dmac_desc(vdesc)); } static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg) @@ -714,6 +804,9 @@ static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg) case AXI_DMAC_REG_CURRENT_DEST_ADDR: case AXI_DMAC_REG_PARTIAL_XFER_LEN: case AXI_DMAC_REG_PARTIAL_XFER_ID: + case AXI_DMAC_REG_CURRENT_SG_ID: + case AXI_DMAC_REG_SG_ADDRESS: + case AXI_DMAC_REG_SG_ADDRESS_HIGH: return true; default: return false; @@ -866,6 +959,10 @@ static int axi_dmac_detect_caps(struct axi_dmac *dmac, unsigned int version) if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) == AXI_DMAC_FLAG_CYCLIC) chan->hw_cyclic = true; + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, 0xffffffff); + if (axi_dmac_read(dmac, AXI_DMAC_REG_SG_ADDRESS)) + chan->hw_sg = true; + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1); if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) == 1) chan->hw_2d = true; @@ -911,6 +1008,7 @@ static int axi_dmac_probe(struct platform_device *pdev) struct axi_dmac *dmac; struct regmap *regmap; unsigned int version; + u32 irq_mask = 0; int ret; dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); @@ -966,6 +1064,7 @@ static int axi_dmac_probe(struct platform_device *pdev) dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width); dma_dev->directions = BIT(dmac->chan.direction); dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_dev->max_sg_burst = 31; /* 31 SGs maximum in one burst */ INIT_LIST_HEAD(&dma_dev->channels); dmac->chan.vchan.desc_free = axi_dmac_desc_free; @@ -977,7 +1076,10 @@ static int axi_dmac_probe(struct platform_device *pdev) dma_dev->copy_align = (dmac->chan.address_align_mask + 1); - axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00); + if (dmac->chan.hw_sg) + irq_mask |= AXI_DMAC_IRQ_SOT; + + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, irq_mask); if (of_dma_is_coherent(pdev->dev.of_node)) { ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b7388ae62d7f1fde1b24118a38c1d454974b2bd9..491b222402216a4a7bee1627b3e559321af33f36 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1103,6 +1103,9 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_register); static void __dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan) { + if (chan->local == NULL) + return; + WARN_ONCE(!device->device_release && chan->client_count, "%s called while %d clients hold a reference\n", __func__, chan->client_count); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index ffe621695e472b6b6d96e2a06a85ff514ee65651..a4f6088378492d0338fe65edc86ec51c4a9c0a10 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -21,6 +21,10 @@ #include #include +static bool nobounce; +module_param(nobounce, bool, 0644); +MODULE_PARM_DESC(nobounce, "Prevent using swiotlb buffer (default: use swiotlb buffer)"); + static unsigned int test_buf_size = 16384; module_param(test_buf_size, uint, 0644); MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); @@ -90,6 +94,7 @@ MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts"); /** * struct dmatest_params - test parameters. + * @nobounce: prevent using swiotlb buffer * @buf_size: size of the memcpy test buffer * @channel: bus ID of the channel to test * @device: bus ID of the DMA Engine to test @@ -106,6 +111,7 @@ MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts"); * @polled: use polling for completion instead of interrupts */ struct dmatest_params { + bool nobounce; unsigned int buf_size; char channel[20]; char device[32]; @@ -215,6 +221,7 @@ struct dmatest_done { struct dmatest_data { u8 **raw; u8 **aligned; + gfp_t gfp_flags; unsigned int cnt; unsigned int off; }; @@ -533,7 +540,7 @@ static int dmatest_alloc_test_data(struct dmatest_data *d, goto err; for (i = 0; i < d->cnt; i++) { - d->raw[i] = kmalloc(buf_size + align, GFP_KERNEL); + d->raw[i] = kmalloc(buf_size + align, d->gfp_flags); if (!d->raw[i]) goto err; @@ -655,6 +662,13 @@ static int dmatest_func(void *data) goto err_free_coefs; } + src->gfp_flags = GFP_KERNEL; + dst->gfp_flags = GFP_KERNEL; + if (params->nobounce) { + src->gfp_flags = GFP_DMA; + dst->gfp_flags = GFP_DMA; + } + if (dmatest_alloc_test_data(src, buf_size, align) < 0) goto err_free_coefs; @@ -1093,6 +1107,7 @@ static void add_threaded_test(struct dmatest_info *info) struct dmatest_params *params = &info->params; /* Copy test parameters */ + params->nobounce = nobounce; params->buf_size = test_buf_size; strscpy(params->channel, strim(test_channel), sizeof(params->channel)); strscpy(params->device, strim(test_device), sizeof(params->device)); diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c index 0745d9e7d259b1294e519744ced86dcdecbd7d7d..406f169b09a75a52197c6615c675b1eb61022169 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c @@ -176,7 +176,7 @@ dw_edma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) }; struct dentry *regs_dent, *ch_dent; int nr_entries, i; - char name[16]; + char name[32]; regs_dent = debugfs_create_dir(WRITE_STR, dent); @@ -239,7 +239,7 @@ static noinline_for_stack void dw_edma_debugfs_regs_rd(struct dw_edma *dw, }; struct dentry *regs_dent, *ch_dent; int nr_entries, i; - char name[16]; + char name[32]; regs_dent = debugfs_create_dir(READ_STR, dent); diff --git a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c index 520c81978b085fb244311d041faf1786ec7b5750..dcdc57fe976c134f7825425627e782f3ad5b96de 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c @@ -116,7 +116,7 @@ static void dw_hdma_debugfs_regs_ch(struct dw_edma *dw, enum dw_edma_dir dir, static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) { struct dentry *regs_dent, *ch_dent; - char name[16]; + char name[32]; int i; regs_dent = debugfs_create_dir(WRITE_STR, dent); @@ -133,7 +133,7 @@ static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) static void dw_hdma_debugfs_regs_rd(struct dw_edma *dw, struct dentry *dent) { struct dentry *regs_dent, *ch_dent; - char name[16]; + char name[32]; int i; regs_dent = debugfs_create_dir(READ_STR, dent); diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 238a69bd0d6f5d3ba6d8329543c49d3a750dba21..45cc419b1b4acbe87c12c3daaccafce73f8de1ba 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -9,6 +9,7 @@ * Vybrid and Layerscape SoCs. */ +#include #include #include #include @@ -21,10 +22,6 @@ #include "fsl-edma-common.h" -#define ARGS_RX BIT(0) -#define ARGS_REMOTE BIT(1) -#define ARGS_MULTI_FIFO BIT(2) - static void fsl_edma_synchronize(struct dma_chan *chan) { struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); @@ -153,9 +150,15 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, i = fsl_chan - fsl_edma->chans; fsl_chan->priority = dma_spec->args[1]; - fsl_chan->is_rxchan = dma_spec->args[2] & ARGS_RX; - fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE; - fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO; + fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX; + fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE; + fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO; + + if ((dma_spec->args[2] & FSL_EDMA_EVEN_CH) && (i & 0x1)) + continue; + + if ((dma_spec->args[2] & FSL_EDMA_ODD_CH) && !(i & 0x1)) + continue; if (!b_chmux && i == dma_spec->args[0]) { chan = dma_get_slave_channel(chan); diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 47cb284680494cc842141c3c680a617a424d5669..a1d0aa63142a981bb59fcde5663e53ee7355947c 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -805,7 +805,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 1d918d45d9f6d67453f8f662e08b6f430161f126..77f8885cf4075acfd3ff535b7e09519a8df41c70 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -165,7 +165,7 @@ static void idxd_cdev_dev_release(struct device *dev) struct idxd_wq *wq = idxd_cdev->wq; cdev_ctx = &ictx[wq->idxd->data->type]; - ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); + ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor); kfree(idxd_cdev); } @@ -463,7 +463,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) cdev = &idxd_cdev->cdev; dev = cdev_dev(idxd_cdev); cdev_ctx = &ictx[wq->idxd->data->type]; - minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); + minor = ida_alloc_max(&cdev_ctx->minor_ida, MINORMASK, GFP_KERNEL); if (minor < 0) { kfree(idxd_cdev); return minor; diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f43d81128b96b3672fab2ce2de0f54fea1660f2e..ecfdf4a8f1f838ea49f1dbe3f60b15574aa8be11 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -807,6 +807,9 @@ err_bmap: static void idxd_device_evl_free(struct idxd_device *idxd) { + void *evl_log; + unsigned int evl_log_size; + dma_addr_t evl_dma; union gencfg_reg gencfg; union genctrl_reg genctrl; struct device *dev = &idxd->pdev->dev; @@ -827,11 +830,15 @@ static void idxd_device_evl_free(struct idxd_device *idxd) iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); - dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); bitmap_free(evl->bmap); + evl_log = evl->log; + evl_log_size = evl->log_size; + evl_dma = evl->dma; evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; spin_unlock(&evl->lock); + + dma_free_coherent(dev, evl_log_size, evl_log, evl_dma); } static void idxd_group_config_write(struct idxd_group *group) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index f81ecf5863e86ec00190f986e64f129cc8a3dac7..9b42f5e96b1e0a2b7d001fa406a7d98cd040a577 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -421,9 +421,7 @@ struct sdma_desc { * @shp_addr: value for gReg[6] * @per_addr: value for gReg[2] * @status: status of dma channel - * @context_loaded: ensure context is only loaded once * @data: specific sdma interface structure - * @bd_pool: dma_pool for bd * @terminate_worker: used to call back into terminate work function * @terminated: terminated list * @is_ram_script: flag for script in ram @@ -486,8 +484,6 @@ struct sdma_channel { * @num_script_addrs: Number of script addresses in this image * @ram_code_start: offset of SDMA ram image in this firmware image * @ram_code_size: size of SDMA ram image - * @script_addrs: Stores the start address of the SDMA scripts - * (in SDMA memory space) */ struct sdma_firmware_header { u32 magic; diff --git a/drivers/dma/ls2x-apb-dma.c b/drivers/dma/ls2x-apb-dma.c new file mode 100644 index 0000000000000000000000000000000000000000..a49913f3ed3f7c0b36de34533f75e7806eb2a8c0 --- /dev/null +++ b/drivers/dma/ls2x-apb-dma.c @@ -0,0 +1,705 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for the Loongson LS2X APB DMA Controller + * + * Copyright (C) 2017-2023 Loongson Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +/* Global Configuration Register */ +#define LDMA_ORDER_ERG 0x0 + +/* Bitfield definitions */ + +/* Bitfields in Global Configuration Register */ +#define LDMA_64BIT_EN BIT(0) /* 1: 64 bit support */ +#define LDMA_UNCOHERENT_EN BIT(1) /* 0: cache, 1: uncache */ +#define LDMA_ASK_VALID BIT(2) +#define LDMA_START BIT(3) /* DMA start operation */ +#define LDMA_STOP BIT(4) /* DMA stop operation */ +#define LDMA_CONFIG_MASK GENMASK(4, 0) /* DMA controller config bits mask */ + +/* Bitfields in ndesc_addr field of HW decriptor */ +#define LDMA_DESC_EN BIT(0) /*1: The next descriptor is valid */ +#define LDMA_DESC_ADDR_LOW GENMASK(31, 1) + +/* Bitfields in cmd field of HW decriptor */ +#define LDMA_INT BIT(1) /* Enable DMA interrupts */ +#define LDMA_DATA_DIRECTION BIT(12) /* 1: write to device, 0: read from device */ + +#define LDMA_SLAVE_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +#define LDMA_MAX_TRANS_LEN U32_MAX + +/*-- descriptors -----------------------------------------------------*/ + +/* + * struct ls2x_dma_hw_desc - DMA HW descriptor + * @ndesc_addr: the next descriptor low address. + * @mem_addr: memory low address. + * @apb_addr: device buffer address. + * @len: length of a piece of carried content, in words. + * @step_len: length between two moved memory data blocks. + * @step_times: number of blocks to be carried in a single DMA operation. + * @cmd: descriptor command or state. + * @stats: DMA status. + * @high_ndesc_addr: the next descriptor high address. + * @high_mem_addr: memory high address. + * @reserved: reserved + */ +struct ls2x_dma_hw_desc { + u32 ndesc_addr; + u32 mem_addr; + u32 apb_addr; + u32 len; + u32 step_len; + u32 step_times; + u32 cmd; + u32 stats; + u32 high_ndesc_addr; + u32 high_mem_addr; + u32 reserved[2]; +} __packed; + +/* + * struct ls2x_dma_sg - ls2x dma scatter gather entry + * @hw: the pointer to DMA HW descriptor. + * @llp: physical address of the DMA HW descriptor. + * @phys: destination or source address(mem). + * @len: number of Bytes to read. + */ +struct ls2x_dma_sg { + struct ls2x_dma_hw_desc *hw; + dma_addr_t llp; + dma_addr_t phys; + u32 len; +}; + +/* + * struct ls2x_dma_desc - software descriptor + * @vdesc: pointer to the virtual dma descriptor. + * @cyclic: flag to dma cyclic + * @burst_size: burst size of transaction, in words. + * @desc_num: number of sg entries. + * @direction: transfer direction, to or from device. + * @status: dma controller status. + * @sg: array of sgs. + */ +struct ls2x_dma_desc { + struct virt_dma_desc vdesc; + bool cyclic; + size_t burst_size; + u32 desc_num; + enum dma_transfer_direction direction; + enum dma_status status; + struct ls2x_dma_sg sg[] __counted_by(desc_num); +}; + +/*-- Channels --------------------------------------------------------*/ + +/* + * struct ls2x_dma_chan - internal representation of an LS2X APB DMA channel + * @vchan: virtual dma channel entry. + * @desc: pointer to the ls2x sw dma descriptor. + * @pool: hw desc table + * @irq: irq line + * @sconfig: configuration for slave transfers, passed via .device_config + */ +struct ls2x_dma_chan { + struct virt_dma_chan vchan; + struct ls2x_dma_desc *desc; + void *pool; + int irq; + struct dma_slave_config sconfig; +}; + +/*-- Controller ------------------------------------------------------*/ + +/* + * struct ls2x_dma_priv - LS2X APB DMAC specific information + * @ddev: dmaengine dma_device object members + * @dma_clk: DMAC clock source + * @regs: memory mapped register base + * @lchan: channel to store ls2x_dma_chan structures + */ +struct ls2x_dma_priv { + struct dma_device ddev; + struct clk *dma_clk; + void __iomem *regs; + struct ls2x_dma_chan lchan; +}; + +/*-- Helper functions ------------------------------------------------*/ + +static inline struct ls2x_dma_desc *to_ldma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct ls2x_dma_desc, vdesc); +} + +static inline struct ls2x_dma_chan *to_ldma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ls2x_dma_chan, vchan.chan); +} + +static inline struct ls2x_dma_priv *to_ldma_priv(struct dma_device *ddev) +{ + return container_of(ddev, struct ls2x_dma_priv, ddev); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static void ls2x_dma_desc_free(struct virt_dma_desc *vdesc) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(vdesc->tx.chan); + struct ls2x_dma_desc *desc = to_ldma_desc(vdesc); + int i; + + for (i = 0; i < desc->desc_num; i++) { + if (desc->sg[i].hw) + dma_pool_free(lchan->pool, desc->sg[i].hw, + desc->sg[i].llp); + } + + kfree(desc); +} + +static void ls2x_dma_write_cmd(struct ls2x_dma_chan *lchan, bool cmd) +{ + struct ls2x_dma_priv *priv = to_ldma_priv(lchan->vchan.chan.device); + u64 val; + + val = lo_hi_readq(priv->regs + LDMA_ORDER_ERG) & ~LDMA_CONFIG_MASK; + val |= LDMA_64BIT_EN | cmd; + lo_hi_writeq(val, priv->regs + LDMA_ORDER_ERG); +} + +static void ls2x_dma_start_transfer(struct ls2x_dma_chan *lchan) +{ + struct ls2x_dma_priv *priv = to_ldma_priv(lchan->vchan.chan.device); + struct ls2x_dma_sg *ldma_sg; + struct virt_dma_desc *vdesc; + u64 val; + + /* Get the next descriptor */ + vdesc = vchan_next_desc(&lchan->vchan); + if (!vdesc) { + lchan->desc = NULL; + return; + } + + list_del(&vdesc->node); + lchan->desc = to_ldma_desc(vdesc); + ldma_sg = &lchan->desc->sg[0]; + + /* Start DMA */ + lo_hi_writeq(0, priv->regs + LDMA_ORDER_ERG); + val = (ldma_sg->llp & ~LDMA_CONFIG_MASK) | LDMA_64BIT_EN | LDMA_START; + lo_hi_writeq(val, priv->regs + LDMA_ORDER_ERG); +} + +static size_t ls2x_dmac_detect_burst(struct ls2x_dma_chan *lchan) +{ + u32 maxburst, buswidth; + + /* Reject definitely invalid configurations */ + if ((lchan->sconfig.src_addr_width & LDMA_SLAVE_BUSWIDTHS) && + (lchan->sconfig.dst_addr_width & LDMA_SLAVE_BUSWIDTHS)) + return 0; + + if (lchan->sconfig.direction == DMA_MEM_TO_DEV) { + maxburst = lchan->sconfig.dst_maxburst; + buswidth = lchan->sconfig.dst_addr_width; + } else { + maxburst = lchan->sconfig.src_maxburst; + buswidth = lchan->sconfig.src_addr_width; + } + + /* If maxburst is zero, fallback to LDMA_MAX_TRANS_LEN */ + return maxburst ? (maxburst * buswidth) >> 2 : LDMA_MAX_TRANS_LEN; +} + +static void ls2x_dma_fill_desc(struct ls2x_dma_chan *lchan, u32 sg_index, + struct ls2x_dma_desc *desc) +{ + struct ls2x_dma_sg *ldma_sg = &desc->sg[sg_index]; + u32 num_segments, segment_size; + + if (desc->direction == DMA_MEM_TO_DEV) { + ldma_sg->hw->cmd = LDMA_INT | LDMA_DATA_DIRECTION; + ldma_sg->hw->apb_addr = lchan->sconfig.dst_addr; + } else { + ldma_sg->hw->cmd = LDMA_INT; + ldma_sg->hw->apb_addr = lchan->sconfig.src_addr; + } + + ldma_sg->hw->mem_addr = lower_32_bits(ldma_sg->phys); + ldma_sg->hw->high_mem_addr = upper_32_bits(ldma_sg->phys); + + /* Split into multiple equally sized segments if necessary */ + num_segments = DIV_ROUND_UP((ldma_sg->len + 3) >> 2, desc->burst_size); + segment_size = DIV_ROUND_UP((ldma_sg->len + 3) >> 2, num_segments); + + /* Word count register takes input in words */ + ldma_sg->hw->len = segment_size; + ldma_sg->hw->step_times = num_segments; + ldma_sg->hw->step_len = 0; + + /* lets make a link list */ + if (sg_index) { + desc->sg[sg_index - 1].hw->ndesc_addr = ldma_sg->llp | LDMA_DESC_EN; + desc->sg[sg_index - 1].hw->high_ndesc_addr = upper_32_bits(ldma_sg->llp); + } +} + +/*-- DMA Engine API --------------------------------------------------*/ + +/* + * ls2x_dma_alloc_chan_resources - allocate resources for DMA channel + * @chan: allocate descriptor resources for this channel + * + * return - the number of allocated descriptors + */ +static int ls2x_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + + /* Create a pool of consistent memory blocks for hardware descriptors */ + lchan->pool = dma_pool_create(dev_name(chan2dev(chan)), + chan->device->dev, PAGE_SIZE, + __alignof__(struct ls2x_dma_hw_desc), 0); + if (!lchan->pool) { + dev_err(chan2dev(chan), "No memory for descriptors\n"); + return -ENOMEM; + } + + return 1; +} + +/* + * ls2x_dma_free_chan_resources - free all channel resources + * @chan: DMA channel + */ +static void ls2x_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + + vchan_free_chan_resources(to_virt_chan(chan)); + dma_pool_destroy(lchan->pool); + lchan->pool = NULL; +} + +/* + * ls2x_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: tx descriptor status flags + * @context: transaction context (ignored) + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +ls2x_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + u32 sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + struct ls2x_dma_desc *desc; + struct scatterlist *sg; + size_t burst_size; + int i; + + if (unlikely(!sg_len || !is_slave_direction(direction))) + return NULL; + + burst_size = ls2x_dmac_detect_burst(lchan); + if (!burst_size) + return NULL; + + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->desc_num = sg_len; + desc->direction = direction; + desc->burst_size = burst_size; + + for_each_sg(sgl, sg, sg_len, i) { + struct ls2x_dma_sg *ldma_sg = &desc->sg[i]; + + /* Allocate DMA capable memory for hardware descriptor */ + ldma_sg->hw = dma_pool_alloc(lchan->pool, GFP_NOWAIT, &ldma_sg->llp); + if (!ldma_sg->hw) { + desc->desc_num = i; + ls2x_dma_desc_free(&desc->vdesc); + return NULL; + } + + ldma_sg->phys = sg_dma_address(sg); + ldma_sg->len = sg_dma_len(sg); + + ls2x_dma_fill_desc(lchan, i, desc); + } + + /* Setting the last descriptor enable bit */ + desc->sg[sg_len - 1].hw->ndesc_addr &= ~LDMA_DESC_EN; + desc->status = DMA_IN_PROGRESS; + + return vchan_tx_prep(&lchan->vchan, &desc->vdesc, flags); +} + +/* + * ls2x_dma_prep_dma_cyclic - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + * @flags: tx descriptor status flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +ls2x_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + struct ls2x_dma_desc *desc; + size_t burst_size; + u32 num_periods; + int i; + + if (unlikely(!buf_len || !period_len)) + return NULL; + + if (unlikely(!is_slave_direction(direction))) + return NULL; + + burst_size = ls2x_dmac_detect_burst(lchan); + if (!burst_size) + return NULL; + + num_periods = buf_len / period_len; + desc = kzalloc(struct_size(desc, sg, num_periods), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->desc_num = num_periods; + desc->direction = direction; + desc->burst_size = burst_size; + + /* Build cyclic linked list */ + for (i = 0; i < num_periods; i++) { + struct ls2x_dma_sg *ldma_sg = &desc->sg[i]; + + /* Allocate DMA capable memory for hardware descriptor */ + ldma_sg->hw = dma_pool_alloc(lchan->pool, GFP_NOWAIT, &ldma_sg->llp); + if (!ldma_sg->hw) { + desc->desc_num = i; + ls2x_dma_desc_free(&desc->vdesc); + return NULL; + } + + ldma_sg->phys = buf_addr + period_len * i; + ldma_sg->len = period_len; + + ls2x_dma_fill_desc(lchan, i, desc); + } + + /* Lets make a cyclic list */ + desc->sg[num_periods - 1].hw->ndesc_addr = desc->sg[0].llp | LDMA_DESC_EN; + desc->sg[num_periods - 1].hw->high_ndesc_addr = upper_32_bits(desc->sg[0].llp); + desc->cyclic = true; + desc->status = DMA_IN_PROGRESS; + + return vchan_tx_prep(&lchan->vchan, &desc->vdesc, flags); +} + +/* + * ls2x_slave_config - set slave configuration for channel + * @chan: dma channel + * @cfg: slave configuration + * + * Sets slave configuration for channel + */ +static int ls2x_dma_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + + memcpy(&lchan->sconfig, config, sizeof(*config)); + return 0; +} + +/* + * ls2x_dma_issue_pending - push pending transactions to the hardware + * @chan: channel + * + * When this function is called, all pending transactions are pushed to the + * hardware and executed. + */ +static void ls2x_dma_issue_pending(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&lchan->vchan.lock, flags); + if (vchan_issue_pending(&lchan->vchan) && !lchan->desc) + ls2x_dma_start_transfer(lchan); + spin_unlock_irqrestore(&lchan->vchan.lock, flags); +} + +/* + * ls2x_dma_terminate_all - terminate all transactions + * @chan: channel + * + * Stops all DMA transactions. + */ +static int ls2x_dma_terminate_all(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&lchan->vchan.lock, flags); + /* Setting stop cmd */ + ls2x_dma_write_cmd(lchan, LDMA_STOP); + if (lchan->desc) { + vchan_terminate_vdesc(&lchan->desc->vdesc); + lchan->desc = NULL; + } + + vchan_get_all_descriptors(&lchan->vchan, &head); + spin_unlock_irqrestore(&lchan->vchan.lock, flags); + + vchan_dma_desc_free_list(&lchan->vchan, &head); + return 0; +} + +/* + * ls2x_dma_synchronize - Synchronizes the termination of transfers to the + * current context. + * @chan: channel + */ +static void ls2x_dma_synchronize(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + + vchan_synchronize(&lchan->vchan); +} + +static int ls2x_dma_pause(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&lchan->vchan.lock, flags); + if (lchan->desc && lchan->desc->status == DMA_IN_PROGRESS) { + ls2x_dma_write_cmd(lchan, LDMA_STOP); + lchan->desc->status = DMA_PAUSED; + } + spin_unlock_irqrestore(&lchan->vchan.lock, flags); + + return 0; +} + +static int ls2x_dma_resume(struct dma_chan *chan) +{ + struct ls2x_dma_chan *lchan = to_ldma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&lchan->vchan.lock, flags); + if (lchan->desc && lchan->desc->status == DMA_PAUSED) { + lchan->desc->status = DMA_IN_PROGRESS; + ls2x_dma_write_cmd(lchan, LDMA_START); + } + spin_unlock_irqrestore(&lchan->vchan.lock, flags); + + return 0; +} + +/* + * ls2x_dma_isr - LS2X DMA Interrupt handler + * @irq: IRQ number + * @dev_id: Pointer to ls2x_dma_chan + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t ls2x_dma_isr(int irq, void *dev_id) +{ + struct ls2x_dma_chan *lchan = dev_id; + struct ls2x_dma_desc *desc; + + spin_lock(&lchan->vchan.lock); + desc = lchan->desc; + if (desc) { + if (desc->cyclic) { + vchan_cyclic_callback(&desc->vdesc); + } else { + desc->status = DMA_COMPLETE; + vchan_cookie_complete(&desc->vdesc); + ls2x_dma_start_transfer(lchan); + } + + /* ls2x_dma_start_transfer() updates lchan->desc */ + if (!lchan->desc) + ls2x_dma_write_cmd(lchan, LDMA_STOP); + } + spin_unlock(&lchan->vchan.lock); + + return IRQ_HANDLED; +} + +static int ls2x_dma_chan_init(struct platform_device *pdev, + struct ls2x_dma_priv *priv) +{ + struct ls2x_dma_chan *lchan = &priv->lchan; + struct device *dev = &pdev->dev; + int ret; + + lchan->irq = platform_get_irq(pdev, 0); + if (lchan->irq < 0) + return lchan->irq; + + ret = devm_request_irq(dev, lchan->irq, ls2x_dma_isr, IRQF_TRIGGER_RISING, + dev_name(&pdev->dev), lchan); + if (ret) + return ret; + + /* Initialize channels related values */ + INIT_LIST_HEAD(&priv->ddev.channels); + lchan->vchan.desc_free = ls2x_dma_desc_free; + vchan_init(&lchan->vchan, &priv->ddev); + + return 0; +} + +/* + * ls2x_dma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int ls2x_dma_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ls2x_dma_priv *priv; + struct dma_device *ddev; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->regs)) + return dev_err_probe(dev, PTR_ERR(priv->regs), + "devm_platform_ioremap_resource failed.\n"); + + priv->dma_clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->dma_clk)) + return dev_err_probe(dev, PTR_ERR(priv->dma_clk), "devm_clk_get failed.\n"); + + ret = clk_prepare_enable(priv->dma_clk); + if (ret) + return dev_err_probe(dev, ret, "clk_prepare_enable failed.\n"); + + ret = ls2x_dma_chan_init(pdev, priv); + if (ret) + goto disable_clk; + + ddev = &priv->ddev; + ddev->dev = dev; + dma_cap_zero(ddev->cap_mask); + dma_cap_set(DMA_SLAVE, ddev->cap_mask); + dma_cap_set(DMA_CYCLIC, ddev->cap_mask); + + ddev->device_alloc_chan_resources = ls2x_dma_alloc_chan_resources; + ddev->device_free_chan_resources = ls2x_dma_free_chan_resources; + ddev->device_tx_status = dma_cookie_status; + ddev->device_issue_pending = ls2x_dma_issue_pending; + ddev->device_prep_slave_sg = ls2x_dma_prep_slave_sg; + ddev->device_prep_dma_cyclic = ls2x_dma_prep_dma_cyclic; + ddev->device_config = ls2x_dma_slave_config; + ddev->device_terminate_all = ls2x_dma_terminate_all; + ddev->device_synchronize = ls2x_dma_synchronize; + ddev->device_pause = ls2x_dma_pause; + ddev->device_resume = ls2x_dma_resume; + + ddev->src_addr_widths = LDMA_SLAVE_BUSWIDTHS; + ddev->dst_addr_widths = LDMA_SLAVE_BUSWIDTHS; + ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + + ret = dma_async_device_register(&priv->ddev); + if (ret < 0) + goto disable_clk; + + ret = of_dma_controller_register(dev->of_node, of_dma_xlate_by_chan_id, priv); + if (ret < 0) + goto unregister_dmac; + + platform_set_drvdata(pdev, priv); + + dev_info(dev, "Loongson LS2X APB DMA driver registered successfully.\n"); + return 0; + +unregister_dmac: + dma_async_device_unregister(&priv->ddev); +disable_clk: + clk_disable_unprepare(priv->dma_clk); + + return ret; +} + +/* + * ls2x_dma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + */ +static void ls2x_dma_remove(struct platform_device *pdev) +{ + struct ls2x_dma_priv *priv = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&priv->ddev); + clk_disable_unprepare(priv->dma_clk); +} + +static const struct of_device_id ls2x_dma_of_match_table[] = { + { .compatible = "loongson,ls2k1000-apbdma" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ls2x_dma_of_match_table); + +static struct platform_driver ls2x_dmac_driver = { + .probe = ls2x_dma_probe, + .remove_new = ls2x_dma_remove, + .driver = { + .name = "ls2x-apbdma", + .of_match_table = ls2x_dma_of_match_table, + }, +}; +module_platform_driver(ls2x_dmac_driver); + +MODULE_DESCRIPTION("Loongson LS2X APB DMA Controller driver"); +MODULE_AUTHOR("Loongson Technology Corporation Limited"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c index 1b0a95892627d6dc31439b9112f1c76d9b86071d..7b41c670970a655267f470a945178c53c04602e9 100644 --- a/drivers/dma/milbeaut-hdmac.c +++ b/drivers/dma/milbeaut-hdmac.c @@ -531,7 +531,7 @@ disable_clk: return ret; } -static int milbeaut_hdmac_remove(struct platform_device *pdev) +static void milbeaut_hdmac_remove(struct platform_device *pdev) { struct milbeaut_hdmac_device *mdev = platform_get_drvdata(pdev); struct dma_chan *chan; @@ -546,16 +546,21 @@ static int milbeaut_hdmac_remove(struct platform_device *pdev) */ list_for_each_entry(chan, &mdev->ddev.channels, device_node) { ret = dmaengine_terminate_sync(chan); - if (ret) - return ret; + if (ret) { + /* + * This results in resource leakage and maybe also + * use-after-free errors as e.g. *mdev is kfreed. + */ + dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n", + chan->chan_id, ERR_PTR(ret)); + return; + } milbeaut_hdmac_free_chan_resources(chan); } of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&mdev->ddev); clk_disable_unprepare(mdev->clk); - - return 0; } static const struct of_device_id milbeaut_hdmac_match[] = { @@ -566,7 +571,7 @@ MODULE_DEVICE_TABLE(of, milbeaut_hdmac_match); static struct platform_driver milbeaut_hdmac_driver = { .probe = milbeaut_hdmac_probe, - .remove = milbeaut_hdmac_remove, + .remove_new = milbeaut_hdmac_remove, .driver = { .name = "milbeaut-m10v-hdmac", .of_match_table = milbeaut_hdmac_match, diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index d29d01e730aa09171eecc60cfd298943b9783c9a..2cce529b448eb732f90dbeee236ac92a8d599ced 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -368,7 +368,7 @@ disable_xdmac: return ret; } -static int milbeaut_xdmac_remove(struct platform_device *pdev) +static void milbeaut_xdmac_remove(struct platform_device *pdev) { struct milbeaut_xdmac_device *mdev = platform_get_drvdata(pdev); struct dma_chan *chan; @@ -383,8 +383,15 @@ static int milbeaut_xdmac_remove(struct platform_device *pdev) */ list_for_each_entry(chan, &mdev->ddev.channels, device_node) { ret = dmaengine_terminate_sync(chan); - if (ret) - return ret; + if (ret) { + /* + * This results in resource leakage and maybe also + * use-after-free errors as e.g. *mdev is kfreed. + */ + dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n", + chan->chan_id, ERR_PTR(ret)); + return; + } milbeaut_xdmac_free_chan_resources(chan); } @@ -392,8 +399,6 @@ static int milbeaut_xdmac_remove(struct platform_device *pdev) dma_async_device_unregister(&mdev->ddev); disable_xdmac(mdev); - - return 0; } static const struct of_device_id milbeaut_xdmac_match[] = { @@ -404,7 +409,7 @@ MODULE_DEVICE_TABLE(of, milbeaut_xdmac_match); static struct platform_driver milbeaut_xdmac_driver = { .probe = milbeaut_xdmac_probe, - .remove = milbeaut_xdmac_remove, + .remove_new = milbeaut_xdmac_remove, .driver = { .name = "milbeaut-m10v-xdmac", .of_match_table = milbeaut_xdmac_match, diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 3cf0b38387ae5604adf7fbf07574444171c21043..c29744bfdf2c2afc4ae6a7b6fdcd963a19db2977 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1053,6 +1053,9 @@ static bool _trigger(struct pl330_thread *thrd) thrd->req_running = idx; + if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM) + UNTIL(thrd, PL330_STATE_WFP); + return true; } diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c index 3125a2f162b4788d3ffbf182265bff18532ba19c..428473611115d1007755f244a51ab52eeefe46a5 100644 --- a/drivers/dma/sf-pdma/sf-pdma.c +++ b/drivers/dma/sf-pdma/sf-pdma.c @@ -20,10 +20,13 @@ #include #include #include +#include #include #include "sf-pdma.h" +#define PDMA_QUIRK_NO_STRICT_ORDERING BIT(0) + #ifndef readq static inline unsigned long long readq(void __iomem *addr) { @@ -65,7 +68,7 @@ static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan) static void sf_pdma_fill_desc(struct sf_pdma_desc *desc, u64 dst, u64 src, u64 size) { - desc->xfer_type = PDMA_FULL_SPEED; + desc->xfer_type = desc->chan->pdma->transfer_type; desc->xfer_size = size; desc->dst_addr = dst; desc->src_addr = src; @@ -492,6 +495,7 @@ static void sf_pdma_setup_chans(struct sf_pdma *pdma) static int sf_pdma_probe(struct platform_device *pdev) { + const struct sf_pdma_driver_platdata *ddata; struct sf_pdma *pdma; int ret, n_chans; const enum dma_slave_buswidth widths = @@ -517,6 +521,14 @@ static int sf_pdma_probe(struct platform_device *pdev) pdma->n_chans = n_chans; + pdma->transfer_type = PDMA_FULL_SPEED | PDMA_STRICT_ORDERING; + + ddata = device_get_match_data(&pdev->dev); + if (ddata) { + if (ddata->quirks & PDMA_QUIRK_NO_STRICT_ORDERING) + pdma->transfer_type &= ~PDMA_STRICT_ORDERING; + } + pdma->membase = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pdma->membase)) return PTR_ERR(pdma->membase); @@ -563,7 +575,20 @@ static int sf_pdma_probe(struct platform_device *pdev) return ret; } + ret = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, pdma); + if (ret < 0) { + dev_err(&pdev->dev, + "Can't register SiFive Platform OF_DMA. (%d)\n", ret); + goto err_unregister; + } + return 0; + +err_unregister: + dma_async_device_unregister(&pdma->dma_dev); + + return ret; } static void sf_pdma_remove(struct platform_device *pdev) @@ -583,12 +608,25 @@ static void sf_pdma_remove(struct platform_device *pdev) tasklet_kill(&ch->err_tasklet); } + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&pdma->dma_dev); } +static const struct sf_pdma_driver_platdata mpfs_pdma = { + .quirks = PDMA_QUIRK_NO_STRICT_ORDERING, +}; + static const struct of_device_id sf_pdma_dt_ids[] = { - { .compatible = "sifive,fu540-c000-pdma" }, - { .compatible = "sifive,pdma0" }, + { + .compatible = "sifive,fu540-c000-pdma", + }, { + .compatible = "sifive,pdma0", + }, { + .compatible = "microchip,mpfs-pdma", + .data = &mpfs_pdma, + }, {}, }; MODULE_DEVICE_TABLE(of, sf_pdma_dt_ids); diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h index d05772b5d8d3fd3e0bee8d984eb41916c80e5214..215e07183d7e26b71f4238ab70e14c50bf72b5a2 100644 --- a/drivers/dma/sf-pdma/sf-pdma.h +++ b/drivers/dma/sf-pdma/sf-pdma.h @@ -48,7 +48,8 @@ #define PDMA_ERR_STATUS_MASK GENMASK(31, 31) /* Transfer Type */ -#define PDMA_FULL_SPEED 0xFF000008 +#define PDMA_FULL_SPEED 0xFF000000 +#define PDMA_STRICT_ORDERING BIT(3) /* Error Recovery */ #define MAX_RETRY 1 @@ -112,8 +113,13 @@ struct sf_pdma { struct dma_device dma_dev; void __iomem *membase; void __iomem *mappedbase; + u32 transfer_type; u32 n_chans; struct sf_pdma_chan chans[] __counted_by(n_chans); }; +struct sf_pdma_driver_platdata { + u32 quirks; +}; + #endif /* _SF_PDMA_H */ diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index fea5bda34bc20f679b987e0a2d4ffde4f28e1f7c..1f1e86ba5c66aa8c33556eabb34a94ba431e4cc8 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c @@ -755,11 +755,11 @@ static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec, static int rz_dmac_chan_probe(struct rz_dmac *dmac, struct rz_dmac_chan *channel, - unsigned int index) + u8 index) { struct platform_device *pdev = to_platform_device(dmac->dev); struct rz_lmdesc *lmdesc; - char pdev_irqname[5]; + char pdev_irqname[6]; char *irqname; int ret; @@ -767,7 +767,7 @@ static int rz_dmac_chan_probe(struct rz_dmac *dmac, channel->mid_rid = -EINVAL; /* Request the channel interrupt. */ - sprintf(pdev_irqname, "ch%u", index); + scnprintf(pdev_irqname, sizeof(pdev_irqname), "ch%u", index); channel->irq = platform_get_irq_byname(pdev, pdev_irqname); if (channel->irq < 0) return channel->irq; @@ -845,9 +845,9 @@ static int rz_dmac_probe(struct platform_device *pdev) struct dma_device *engine; struct rz_dmac *dmac; int channel_num; - unsigned int i; int ret; int irq; + u8 i; dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); if (!dmac) diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b33ad829c77ae88c094fc80d942b9d709..f97d80343aea42fd399e93da3492e07a8fa86b35 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index a9b4302f6050144f2359927c4cd4cf41dc38e80f..f7cd0cad056c16ced372483d64b5d1032696dea4 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -706,10 +706,10 @@ static const struct dev_pm_ops usb_dmac_pm = { static int usb_dmac_chan_probe(struct usb_dmac *dmac, struct usb_dmac_chan *uchan, - unsigned int index) + u8 index) { struct platform_device *pdev = to_platform_device(dmac->dev); - char pdev_irqname[5]; + char pdev_irqname[6]; char *irqname; int ret; @@ -717,7 +717,7 @@ static int usb_dmac_chan_probe(struct usb_dmac *dmac, uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index); /* Request the channel interrupt. */ - sprintf(pdev_irqname, "ch%u", index); + scnprintf(pdev_irqname, sizeof(pdev_irqname), "ch%u", index); uchan->irq = platform_get_irq_byname(pdev, pdev_irqname); if (uchan->irq < 0) return -ENODEV; @@ -768,8 +768,8 @@ static int usb_dmac_probe(struct platform_device *pdev) const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH; struct dma_device *engine; struct usb_dmac *dmac; - unsigned int i; int ret; + u8 i; dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); if (!dmac) @@ -869,7 +869,7 @@ static void usb_dmac_chan_remove(struct usb_dmac *dmac, static void usb_dmac_remove(struct platform_device *pdev) { struct usb_dmac *dmac = platform_get_drvdata(pdev); - int i; + u8 i; for (i = 0; i < dmac->n_channels; ++i) usb_dmac_chan_remove(dmac, &dmac->channels[i]); diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 002833fb1fa04cabddb058fa6c575d7ffc42247e..2c489299148eeea268e83e1a74824c434d86cdee 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -31,13 +31,11 @@ /** * struct stedma40_platform_data - Configuration struct for the dma device. * - * @dev_tx: mapping between destination event line and io address - * @dev_rx: mapping between source event line and io address * @disabled_channels: A vector, ending with -1, that marks physical channels * that are for different reasons not available for the driver. * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW * which avoids HW bug that exists in some versions of the controller. - * SoftLLI introduces relink overhead that could impact performace for + * SoftLLI introduces relink overhead that could impact performance for * certain use cases. * @num_of_soft_lli_chans: The number of channels that needs to be configured * to use SoftLLI. @@ -184,7 +182,7 @@ static __maybe_unused u32 d40_backup_regs[] = { /* * since 9540 and 8540 has the same HW revision - * use v4a for 9540 or ealier + * use v4a for 9540 or earlier * use v4b for 8540 or later * HW revision: * DB8500ed has revision 0 @@ -411,7 +409,7 @@ struct d40_desc { * * @base: The virtual address of LCLA. 18 bit aligned. * @dma_addr: DMA address, if mapped - * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used. + * @base_unaligned: The original kmalloc pointer, if kmalloc is used. * This pointer is only there for clean-up on error. * @pages: The number of pages needed for all physical channels. * Only used later for clean-up on error @@ -1655,7 +1653,7 @@ static void dma_tasklet(struct tasklet_struct *t) return; check_pending_tx: - /* Rescue manouver if receiving double interrupts */ + /* Rescue maneuver if receiving double interrupts */ if (d40c->pending_tx > 0) d40c->pending_tx--; spin_unlock_irqrestore(&d40c->lock, flags); @@ -3412,7 +3410,7 @@ static int __init d40_lcla_allocate(struct d40_base *base) base->lcla_pool.base = (void *)page_list[i]; } else { /* - * After many attempts and no succees with finding the correct + * After many attempts and no success with finding the correct * alignment, try with allocating a big buffer. */ dev_warn(base->dev, diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index fa4d4142a68a2183744a46f8d36da5e51fe2b18c..88547a23825b18aece9f4eb00221549e02eaadd1 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -1348,8 +1348,8 @@ static int tegra_dma_program_sid(struct tegra_dma_channel *tdc, int stream_id) static int tegra_dma_probe(struct platform_device *pdev) { const struct tegra_dma_chip_data *cdata = NULL; - struct iommu_fwspec *iommu_spec; - unsigned int stream_id, i; + unsigned int i; + u32 stream_id; struct tegra_dma *tdma; int ret; @@ -1378,12 +1378,10 @@ static int tegra_dma_probe(struct platform_device *pdev) tdma->dma_dev.dev = &pdev->dev; - iommu_spec = dev_iommu_fwspec_get(&pdev->dev); - if (!iommu_spec) { + if (!tegra_dev_iommu_get_stream_id(&pdev->dev, &stream_id)) { dev_err(&pdev->dev, "Missing iommu stream-id\n"); return -EINVAL; } - stream_id = iommu_spec->ids[0] & 0xffff; ret = device_property_read_u32(&pdev->dev, "dma-channel-mask", &tdma->chan_mask); diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 7a0586633bf32624b442cf5569bacf9361408b78..24ad7077c53ba8f87b7dfb53b1d922a4bec411d2 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -153,6 +153,7 @@ struct tegra_adma { void __iomem *base_addr; struct clk *ahub_clk; unsigned int nr_channels; + unsigned long *dma_chan_mask; unsigned long rx_requests_reserved; unsigned long tx_requests_reserved; @@ -741,6 +742,10 @@ static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev) for (i = 0; i < tdma->nr_channels; i++) { tdc = &tdma->channels[i]; + /* skip for reserved channels */ + if (!tdc->tdma) + continue; + ch_reg = &tdc->ch_regs; ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD); /* skip if channel is not active */ @@ -779,6 +784,9 @@ static int __maybe_unused tegra_adma_runtime_resume(struct device *dev) for (i = 0; i < tdma->nr_channels; i++) { tdc = &tdma->channels[i]; + /* skip for reserved channels */ + if (!tdc->tdma) + continue; ch_reg = &tdc->ch_regs; /* skip if channel was not active earlier */ if (!ch_reg->cmd) @@ -867,10 +875,31 @@ static int tegra_adma_probe(struct platform_device *pdev) return PTR_ERR(tdma->ahub_clk); } + tdma->dma_chan_mask = devm_kzalloc(&pdev->dev, + BITS_TO_LONGS(tdma->nr_channels) * sizeof(unsigned long), + GFP_KERNEL); + if (!tdma->dma_chan_mask) + return -ENOMEM; + + /* Enable all channels by default */ + bitmap_fill(tdma->dma_chan_mask, tdma->nr_channels); + + ret = of_property_read_u32_array(pdev->dev.of_node, "dma-channel-mask", + (u32 *)tdma->dma_chan_mask, + BITS_TO_U32(tdma->nr_channels)); + if (ret < 0 && (ret != -EINVAL)) { + dev_err(&pdev->dev, "dma-channel-mask is not complete.\n"); + return ret; + } + INIT_LIST_HEAD(&tdma->dma_dev.channels); for (i = 0; i < tdma->nr_channels; i++) { struct tegra_adma_chan *tdc = &tdma->channels[i]; + /* skip for reserved channels */ + if (!test_bit(i, tdma->dma_chan_mask)) + continue; + tdc->chan_addr = tdma->base_addr + cdata->ch_base_offset + (cdata->ch_reg_size * i); @@ -957,8 +986,10 @@ static void tegra_adma_remove(struct platform_device *pdev) of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&tdma->dma_dev); - for (i = 0; i < tdma->nr_channels; ++i) - irq_dispose_mapping(tdma->channels[i].irq); + for (i = 0; i < tdma->nr_channels; ++i) { + if (tdma->channels[i].irq) + irq_dispose_mapping(tdma->channels[i].irq); + } pm_runtime_disable(&pdev->dev); } diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index acc950bf609c36de7c6382851631ed441abc3af3..d376c117cecf60d0d314cde97f73cb5d3532a013 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -12,6 +12,7 @@ k3-psil-lib-objs := k3-psil.o \ k3-psil-j721s2.o \ k3-psil-am62.o \ k3-psil-am62a.o \ - k3-psil-j784s4.o + k3-psil-j784s4.o \ + k3-psil-am62p.o obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/k3-psil-am62p.c b/drivers/dma/ti/k3-psil-am62p.c new file mode 100644 index 0000000000000000000000000000000000000000..0f338e16d9710f25998a3919f8b65bc57d12a824 --- /dev/null +++ b/drivers/dma/ti/k3-psil-am62p.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = flow_base, \ + }, \ + } + +#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = default_flow, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep am62p_src_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0x7504, 20, 35, 8, 35, 0), + PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), + PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), + PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), + PSIL_PDMA_XY_PKT(0x4302), + PSIL_PDMA_XY_PKT(0x4303), + PSIL_PDMA_XY_PKT(0x4304), + PSIL_PDMA_XY_PKT(0x4305), + PSIL_PDMA_XY_PKT(0x4306), + PSIL_PDMA_XY_PKT(0x4307), + PSIL_PDMA_XY_PKT(0x4308), + PSIL_PDMA_XY_PKT(0x4309), + PSIL_PDMA_XY_PKT(0x430a), + PSIL_PDMA_XY_PKT(0x430b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0x4400), + PSIL_PDMA_XY_PKT(0x4401), + PSIL_PDMA_XY_PKT(0x4402), + PSIL_PDMA_XY_PKT(0x4403), + PSIL_PDMA_XY_PKT(0x4404), + PSIL_PDMA_XY_PKT(0x4405), + PSIL_PDMA_XY_PKT(0x4406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0x4500), + PSIL_PDMA_MCASP(0x4501), + PSIL_PDMA_MCASP(0x4502), + /* CPSW3G */ + PSIL_ETHERNET(0x4600, 19, 19, 16), + /* CSI2RX */ + PSIL_CSI2RX(0x5000), + PSIL_CSI2RX(0x5001), + PSIL_CSI2RX(0x5002), + PSIL_CSI2RX(0x5003), + PSIL_CSI2RX(0x5004), + PSIL_CSI2RX(0x5005), + PSIL_CSI2RX(0x5006), + PSIL_CSI2RX(0x5007), + PSIL_CSI2RX(0x5008), + PSIL_CSI2RX(0x5009), + PSIL_CSI2RX(0x500a), + PSIL_CSI2RX(0x500b), + PSIL_CSI2RX(0x500c), + PSIL_CSI2RX(0x500d), + PSIL_CSI2RX(0x500e), + PSIL_CSI2RX(0x500f), + PSIL_CSI2RX(0x5010), + PSIL_CSI2RX(0x5011), + PSIL_CSI2RX(0x5012), + PSIL_CSI2RX(0x5013), + PSIL_CSI2RX(0x5014), + PSIL_CSI2RX(0x5015), + PSIL_CSI2RX(0x5016), + PSIL_CSI2RX(0x5017), + PSIL_CSI2RX(0x5018), + PSIL_CSI2RX(0x5019), + PSIL_CSI2RX(0x501a), + PSIL_CSI2RX(0x501b), + PSIL_CSI2RX(0x501c), + PSIL_CSI2RX(0x501d), + PSIL_CSI2RX(0x501e), + PSIL_CSI2RX(0x501f), + PSIL_CSI2RX(0x5000), + PSIL_CSI2RX(0x5001), + PSIL_CSI2RX(0x5002), + PSIL_CSI2RX(0x5003), + PSIL_CSI2RX(0x5004), + PSIL_CSI2RX(0x5005), + PSIL_CSI2RX(0x5006), + PSIL_CSI2RX(0x5007), + PSIL_CSI2RX(0x5008), + PSIL_CSI2RX(0x5009), + PSIL_CSI2RX(0x500a), + PSIL_CSI2RX(0x500b), + PSIL_CSI2RX(0x500c), + PSIL_CSI2RX(0x500d), + PSIL_CSI2RX(0x500e), + PSIL_CSI2RX(0x500f), + PSIL_CSI2RX(0x5010), + PSIL_CSI2RX(0x5011), + PSIL_CSI2RX(0x5012), + PSIL_CSI2RX(0x5013), + PSIL_CSI2RX(0x5014), + PSIL_CSI2RX(0x5015), + PSIL_CSI2RX(0x5016), + PSIL_CSI2RX(0x5017), + PSIL_CSI2RX(0x5018), + PSIL_CSI2RX(0x5019), + PSIL_CSI2RX(0x501a), + PSIL_CSI2RX(0x501b), + PSIL_CSI2RX(0x501c), + PSIL_CSI2RX(0x501d), + PSIL_CSI2RX(0x501e), + PSIL_CSI2RX(0x501f), + /* CSIRX 1-3 (only for J722S) */ + PSIL_CSI2RX(0x5100), + PSIL_CSI2RX(0x5101), + PSIL_CSI2RX(0x5102), + PSIL_CSI2RX(0x5103), + PSIL_CSI2RX(0x5104), + PSIL_CSI2RX(0x5105), + PSIL_CSI2RX(0x5106), + PSIL_CSI2RX(0x5107), + PSIL_CSI2RX(0x5108), + PSIL_CSI2RX(0x5109), + PSIL_CSI2RX(0x510a), + PSIL_CSI2RX(0x510b), + PSIL_CSI2RX(0x510c), + PSIL_CSI2RX(0x510d), + PSIL_CSI2RX(0x510e), + PSIL_CSI2RX(0x510f), + PSIL_CSI2RX(0x5110), + PSIL_CSI2RX(0x5111), + PSIL_CSI2RX(0x5112), + PSIL_CSI2RX(0x5113), + PSIL_CSI2RX(0x5114), + PSIL_CSI2RX(0x5115), + PSIL_CSI2RX(0x5116), + PSIL_CSI2RX(0x5117), + PSIL_CSI2RX(0x5118), + PSIL_CSI2RX(0x5119), + PSIL_CSI2RX(0x511a), + PSIL_CSI2RX(0x511b), + PSIL_CSI2RX(0x511c), + PSIL_CSI2RX(0x511d), + PSIL_CSI2RX(0x511e), + PSIL_CSI2RX(0x511f), + PSIL_CSI2RX(0x5200), + PSIL_CSI2RX(0x5201), + PSIL_CSI2RX(0x5202), + PSIL_CSI2RX(0x5203), + PSIL_CSI2RX(0x5204), + PSIL_CSI2RX(0x5205), + PSIL_CSI2RX(0x5206), + PSIL_CSI2RX(0x5207), + PSIL_CSI2RX(0x5208), + PSIL_CSI2RX(0x5209), + PSIL_CSI2RX(0x520a), + PSIL_CSI2RX(0x520b), + PSIL_CSI2RX(0x520c), + PSIL_CSI2RX(0x520d), + PSIL_CSI2RX(0x520e), + PSIL_CSI2RX(0x520f), + PSIL_CSI2RX(0x5210), + PSIL_CSI2RX(0x5211), + PSIL_CSI2RX(0x5212), + PSIL_CSI2RX(0x5213), + PSIL_CSI2RX(0x5214), + PSIL_CSI2RX(0x5215), + PSIL_CSI2RX(0x5216), + PSIL_CSI2RX(0x5217), + PSIL_CSI2RX(0x5218), + PSIL_CSI2RX(0x5219), + PSIL_CSI2RX(0x521a), + PSIL_CSI2RX(0x521b), + PSIL_CSI2RX(0x521c), + PSIL_CSI2RX(0x521d), + PSIL_CSI2RX(0x521e), + PSIL_CSI2RX(0x521f), + PSIL_CSI2RX(0x5300), + PSIL_CSI2RX(0x5301), + PSIL_CSI2RX(0x5302), + PSIL_CSI2RX(0x5303), + PSIL_CSI2RX(0x5304), + PSIL_CSI2RX(0x5305), + PSIL_CSI2RX(0x5306), + PSIL_CSI2RX(0x5307), + PSIL_CSI2RX(0x5308), + PSIL_CSI2RX(0x5309), + PSIL_CSI2RX(0x530a), + PSIL_CSI2RX(0x530b), + PSIL_CSI2RX(0x530c), + PSIL_CSI2RX(0x530d), + PSIL_CSI2RX(0x530e), + PSIL_CSI2RX(0x530f), + PSIL_CSI2RX(0x5310), + PSIL_CSI2RX(0x5311), + PSIL_CSI2RX(0x5312), + PSIL_CSI2RX(0x5313), + PSIL_CSI2RX(0x5314), + PSIL_CSI2RX(0x5315), + PSIL_CSI2RX(0x5316), + PSIL_CSI2RX(0x5317), + PSIL_CSI2RX(0x5318), + PSIL_CSI2RX(0x5319), + PSIL_CSI2RX(0x531a), + PSIL_CSI2RX(0x531b), + PSIL_CSI2RX(0x531c), + PSIL_CSI2RX(0x531d), + PSIL_CSI2RX(0x531e), + PSIL_CSI2RX(0x531f), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep am62p_dst_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), + PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), + PSIL_PDMA_XY_PKT(0xc302), + PSIL_PDMA_XY_PKT(0xc303), + PSIL_PDMA_XY_PKT(0xc304), + PSIL_PDMA_XY_PKT(0xc305), + PSIL_PDMA_XY_PKT(0xc306), + PSIL_PDMA_XY_PKT(0xc307), + PSIL_PDMA_XY_PKT(0xc308), + PSIL_PDMA_XY_PKT(0xc309), + PSIL_PDMA_XY_PKT(0xc30a), + PSIL_PDMA_XY_PKT(0xc30b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0xc400), + PSIL_PDMA_XY_PKT(0xc401), + PSIL_PDMA_XY_PKT(0xc402), + PSIL_PDMA_XY_PKT(0xc403), + PSIL_PDMA_XY_PKT(0xc404), + PSIL_PDMA_XY_PKT(0xc405), + PSIL_PDMA_XY_PKT(0xc406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0xc500), + PSIL_PDMA_MCASP(0xc501), + PSIL_PDMA_MCASP(0xc502), + /* CPSW3G */ + PSIL_ETHERNET(0xc600, 19, 19, 8), + PSIL_ETHERNET(0xc601, 20, 27, 8), + PSIL_ETHERNET(0xc602, 21, 35, 8), + PSIL_ETHERNET(0xc603, 22, 43, 8), + PSIL_ETHERNET(0xc604, 23, 51, 8), + PSIL_ETHERNET(0xc605, 24, 59, 8), + PSIL_ETHERNET(0xc606, 25, 67, 8), + PSIL_ETHERNET(0xc607, 26, 75, 8), +}; + +struct psil_ep_map am62p_ep_map = { + .name = "am62p", + .src = am62p_src_ep_map, + .src_count = ARRAY_SIZE(am62p_src_ep_map), + .dst = am62p_dst_ep_map, + .dst_count = ARRAY_SIZE(am62p_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h index c383723d1c8f662b9c0981d1b7b76e0ccf12196f..a577be97e3447148cfc941ca4afe6c6c17fc7697 100644 --- a/drivers/dma/ti/k3-psil-priv.h +++ b/drivers/dma/ti/k3-psil-priv.h @@ -45,5 +45,6 @@ extern struct psil_ep_map j721s2_ep_map; extern struct psil_ep_map am62_ep_map; extern struct psil_ep_map am62a_ep_map; extern struct psil_ep_map j784s4_ep_map; +extern struct psil_ep_map am62p_ep_map; #endif /* K3_PSIL_PRIV_H_ */ diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index c11389d67a3f0f2ff75f300c8f2dc914d27a1570..25148d9524720372ca8098f6cfe68cc59641d29a 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -26,6 +26,8 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "AM62X", .data = &am62_ep_map }, { .family = "AM62AX", .data = &am62a_ep_map }, { .family = "J784S4", .data = &j784s4_ep_map }, + { .family = "AM62PX", .data = &am62p_ep_map }, + { .family = "J722S", .data = &am62p_ep_map }, { /* sentinel */ } }; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 30fd2f386f36a1ada7fc5cbe6a4359eb7019a559..2841a539c264891cde8b8166d51d5bbf885a5d85 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4441,6 +4441,8 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "AM62X", .data = &am64_soc_data }, { .family = "AM62AX", .data = &am64_soc_data }, { .family = "J784S4", .data = &j721e_soc_data }, + { .family = "AM62PX", .data = &am64_soc_data }, + { .family = "J722S", .data = &am64_soc_data }, { /* sentinel */ } }; diff --git a/drivers/dma/uniphier-mdmac.c b/drivers/dma/uniphier-mdmac.c index 618839df074866c4d9a2feca56a9f4183baea3cb..ad7125f6e2ca8e4452195e3ad4cedf4ee154a5b3 100644 --- a/drivers/dma/uniphier-mdmac.c +++ b/drivers/dma/uniphier-mdmac.c @@ -453,7 +453,7 @@ disable_clk: return ret; } -static int uniphier_mdmac_remove(struct platform_device *pdev) +static void uniphier_mdmac_remove(struct platform_device *pdev) { struct uniphier_mdmac_device *mdev = platform_get_drvdata(pdev); struct dma_chan *chan; @@ -468,16 +468,21 @@ static int uniphier_mdmac_remove(struct platform_device *pdev) */ list_for_each_entry(chan, &mdev->ddev.channels, device_node) { ret = dmaengine_terminate_sync(chan); - if (ret) - return ret; + if (ret) { + /* + * This results in resource leakage and maybe also + * use-after-free errors as e.g. *mdev is kfreed. + */ + dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n", + chan->chan_id, ERR_PTR(ret)); + return; + } uniphier_mdmac_free_chan_resources(chan); } of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&mdev->ddev); clk_disable_unprepare(mdev->clk); - - return 0; } static const struct of_device_id uniphier_mdmac_match[] = { @@ -488,7 +493,7 @@ MODULE_DEVICE_TABLE(of, uniphier_mdmac_match); static struct platform_driver uniphier_mdmac_driver = { .probe = uniphier_mdmac_probe, - .remove = uniphier_mdmac_remove, + .remove_new = uniphier_mdmac_remove, .driver = { .name = "uniphier-mio-dmac", .of_match_table = uniphier_mdmac_match, diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index 3a8ee2b173b52e83b775bbe27d5f715a205aef87..3ce2dc2ad9de4290887e6c5344206d0e747b0c7f 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -563,7 +563,7 @@ out_unregister_dmac: return ret; } -static int uniphier_xdmac_remove(struct platform_device *pdev) +static void uniphier_xdmac_remove(struct platform_device *pdev) { struct uniphier_xdmac_device *xdev = platform_get_drvdata(pdev); struct dma_device *ddev = &xdev->ddev; @@ -579,15 +579,20 @@ static int uniphier_xdmac_remove(struct platform_device *pdev) */ list_for_each_entry(chan, &ddev->channels, device_node) { ret = dmaengine_terminate_sync(chan); - if (ret) - return ret; + if (ret) { + /* + * This results in resource leakage and maybe also + * use-after-free errors as e.g. *xdev is kfreed. + */ + dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n", + chan->chan_id, ERR_PTR(ret)); + return; + } uniphier_xdmac_free_chan_resources(chan); } of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(ddev); - - return 0; } static const struct of_device_id uniphier_xdmac_match[] = { @@ -598,7 +603,7 @@ MODULE_DEVICE_TABLE(of, uniphier_xdmac_match); static struct platform_driver uniphier_xdmac_driver = { .probe = uniphier_xdmac_probe, - .remove = uniphier_xdmac_remove, + .remove_new = uniphier_xdmac_remove, .driver = { .name = "uniphier-xdmac", .of_match_table = uniphier_xdmac_match, diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h index e641a5083e14b081a7871b3a19bda0c57601d6b5..98f5f6fb9ff9c771270c64c364265f72fd7b216d 100644 --- a/drivers/dma/xilinx/xdma-regs.h +++ b/drivers/dma/xilinx/xdma-regs.h @@ -64,9 +64,10 @@ struct xdma_hw_desc { __le64 next_desc; }; -#define XDMA_DESC_SIZE sizeof(struct xdma_hw_desc) -#define XDMA_DESC_BLOCK_SIZE (XDMA_DESC_SIZE * XDMA_DESC_ADJACENT) -#define XDMA_DESC_BLOCK_ALIGN 4096 +#define XDMA_DESC_SIZE sizeof(struct xdma_hw_desc) +#define XDMA_DESC_BLOCK_SIZE (XDMA_DESC_SIZE * XDMA_DESC_ADJACENT) +#define XDMA_DESC_BLOCK_ALIGN 32 +#define XDMA_DESC_BLOCK_BOUNDARY 4096 /* * Channel registers @@ -76,6 +77,7 @@ struct xdma_hw_desc { #define XDMA_CHAN_CONTROL_W1S 0x8 #define XDMA_CHAN_CONTROL_W1C 0xc #define XDMA_CHAN_STATUS 0x40 +#define XDMA_CHAN_STATUS_RC 0x44 #define XDMA_CHAN_COMPLETED_DESC 0x48 #define XDMA_CHAN_ALIGNMENTS 0x4c #define XDMA_CHAN_INTR_ENABLE 0x90 @@ -101,6 +103,7 @@ struct xdma_hw_desc { #define CHAN_CTRL_IE_MAGIC_STOPPED BIT(4) #define CHAN_CTRL_IE_IDLE_STOPPED BIT(6) #define CHAN_CTRL_IE_READ_ERROR GENMASK(13, 9) +#define CHAN_CTRL_IE_WRITE_ERROR GENMASK(18, 14) #define CHAN_CTRL_IE_DESC_ERROR GENMASK(23, 19) #define CHAN_CTRL_NON_INCR_ADDR BIT(25) #define CHAN_CTRL_POLL_MODE_WB BIT(26) @@ -111,8 +114,17 @@ struct xdma_hw_desc { CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \ CHAN_CTRL_IE_MAGIC_STOPPED | \ CHAN_CTRL_IE_READ_ERROR | \ + CHAN_CTRL_IE_WRITE_ERROR | \ CHAN_CTRL_IE_DESC_ERROR) +#define XDMA_CHAN_STATUS_MASK CHAN_CTRL_START + +#define XDMA_CHAN_ERROR_MASK (CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \ + CHAN_CTRL_IE_MAGIC_STOPPED | \ + CHAN_CTRL_IE_READ_ERROR | \ + CHAN_CTRL_IE_WRITE_ERROR | \ + CHAN_CTRL_IE_DESC_ERROR) + /* bits of the channel interrupt enable mask */ #define CHAN_IM_DESC_ERROR BIT(19) #define CHAN_IM_READ_ERROR BIT(9) @@ -134,18 +146,6 @@ struct xdma_hw_desc { #define XDMA_SGDMA_DESC_ADJ 0x4088 #define XDMA_SGDMA_DESC_CREDIT 0x408c -/* bits of the SG DMA control register */ -#define XDMA_CTRL_RUN_STOP BIT(0) -#define XDMA_CTRL_IE_DESC_STOPPED BIT(1) -#define XDMA_CTRL_IE_DESC_COMPLETED BIT(2) -#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH BIT(3) -#define XDMA_CTRL_IE_MAGIC_STOPPED BIT(4) -#define XDMA_CTRL_IE_IDLE_STOPPED BIT(6) -#define XDMA_CTRL_IE_READ_ERROR GENMASK(13, 9) -#define XDMA_CTRL_IE_DESC_ERROR GENMASK(23, 19) -#define XDMA_CTRL_NON_INCR_ADDR BIT(25) -#define XDMA_CTRL_POLL_MODE_WB BIT(26) - /* * interrupt registers */ diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 84a88029226fdc16e423d8162408eea40cc22d00..170017ff2aad6e58c8d0ee4ea6e7d42c15c8202c 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -78,27 +78,31 @@ struct xdma_chan { * @vdesc: Virtual DMA descriptor * @chan: DMA channel pointer * @dir: Transferring direction of the request - * @dev_addr: Physical address on DMA device side * @desc_blocks: Hardware descriptor blocks * @dblk_num: Number of hardware descriptor blocks * @desc_num: Number of hardware descriptors * @completed_desc_num: Completed hardware descriptors * @cyclic: Cyclic transfer vs. scatter-gather + * @interleaved_dma: Interleaved DMA transfer * @periods: Number of periods in the cyclic transfer * @period_size: Size of a period in bytes in cyclic transfers + * @frames_left: Number of frames left in interleaved DMA transfer + * @error: tx error flag */ struct xdma_desc { struct virt_dma_desc vdesc; struct xdma_chan *chan; enum dma_transfer_direction dir; - u64 dev_addr; struct xdma_desc_block *desc_blocks; u32 dblk_num; u32 desc_num; u32 completed_desc_num; bool cyclic; + bool interleaved_dma; u32 periods; u32 period_size; + u32 frames_left; + bool error; }; #define XDMA_DEV_STATUS_REG_DMA BIT(0) @@ -276,6 +280,7 @@ xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num, bool cyclic) sw_desc->chan = chan; sw_desc->desc_num = desc_num; sw_desc->cyclic = cyclic; + sw_desc->error = false; dblk_num = DIV_ROUND_UP(desc_num, XDMA_DESC_ADJACENT); sw_desc->desc_blocks = kcalloc(dblk_num, sizeof(*sw_desc->desc_blocks), GFP_NOWAIT); @@ -371,6 +376,31 @@ static int xdma_xfer_start(struct xdma_chan *xchan) return ret; xchan->busy = true; + + return 0; +} + +/** + * xdma_xfer_stop - Stop DMA transfer + * @xchan: DMA channel pointer + */ +static int xdma_xfer_stop(struct xdma_chan *xchan) +{ + int ret; + u32 val; + struct xdma_device *xdev = xchan->xdev_hdl; + + /* clear run stop bit to prevent any further auto-triggering */ + ret = regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL_W1C, + CHAN_CTRL_RUN_STOP); + if (ret) + return ret; + + /* Clear the channel status register */ + ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &val); + if (ret) + return ret; + return 0; } @@ -475,6 +505,84 @@ static void xdma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags); } +/** + * xdma_terminate_all - Terminate all transactions + * @chan: DMA channel pointer + */ +static int xdma_terminate_all(struct dma_chan *chan) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct virt_dma_desc *vd; + unsigned long flags; + LIST_HEAD(head); + + xdma_xfer_stop(xdma_chan); + + spin_lock_irqsave(&xdma_chan->vchan.lock, flags); + + xdma_chan->busy = false; + vd = vchan_next_desc(&xdma_chan->vchan); + if (vd) { + list_del(&vd->node); + dma_cookie_complete(&vd->tx); + vchan_terminate_vdesc(vd); + } + vchan_get_all_descriptors(&xdma_chan->vchan, &head); + list_splice_tail(&head, &xdma_chan->vchan.desc_terminated); + + spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags); + + return 0; +} + +/** + * xdma_synchronize - Synchronize terminated transactions + * @chan: DMA channel pointer + */ +static void xdma_synchronize(struct dma_chan *chan) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + + vchan_synchronize(&xdma_chan->vchan); +} + +/** + * xdma_fill_descs - Fill hardware descriptors with contiguous memory block addresses + * @sw_desc: tx descriptor state container + * @src_addr: Value for a ->src_addr field of a first descriptor + * @dst_addr: Value for a ->dst_addr field of a first descriptor + * @size: Total size of a contiguous memory block + * @filled_descs_num: Number of filled hardware descriptors for corresponding sw_desc + */ +static inline u32 xdma_fill_descs(struct xdma_desc *sw_desc, u64 src_addr, + u64 dst_addr, u32 size, u32 filled_descs_num) +{ + u32 left = size, len, desc_num = filled_descs_num; + struct xdma_desc_block *dblk; + struct xdma_hw_desc *desc; + + dblk = sw_desc->desc_blocks + (desc_num / XDMA_DESC_ADJACENT); + desc = dblk->virt_addr; + desc += desc_num & XDMA_DESC_ADJACENT_MASK; + do { + len = min_t(u32, left, XDMA_DESC_BLEN_MAX); + /* set hardware descriptor */ + desc->bytes = cpu_to_le32(len); + desc->src_addr = cpu_to_le64(src_addr); + desc->dst_addr = cpu_to_le64(dst_addr); + if (!(++desc_num & XDMA_DESC_ADJACENT_MASK)) + desc = (++dblk)->virt_addr; + else + desc++; + + src_addr += len; + dst_addr += len; + left -= len; + } while (left); + + return desc_num - filled_descs_num; +} + /** * xdma_prep_device_sg - prepare a descriptor for a DMA transaction * @chan: DMA channel pointer @@ -491,13 +599,10 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl, { struct xdma_chan *xdma_chan = to_xdma_chan(chan); struct dma_async_tx_descriptor *tx_desc; - u32 desc_num = 0, i, len, rest; - struct xdma_desc_block *dblk; - struct xdma_hw_desc *desc; struct xdma_desc *sw_desc; - u64 dev_addr, *src, *dst; + u32 desc_num = 0, i; + u64 addr, dev_addr, *src, *dst; struct scatterlist *sg; - u64 addr; for_each_sg(sgl, sg, sg_len, i) desc_num += DIV_ROUND_UP(sg_dma_len(sg), XDMA_DESC_BLEN_MAX); @@ -506,6 +611,8 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl, if (!sw_desc) return NULL; sw_desc->dir = dir; + sw_desc->cyclic = false; + sw_desc->interleaved_dma = false; if (dir == DMA_MEM_TO_DEV) { dev_addr = xdma_chan->cfg.dst_addr; @@ -517,32 +624,11 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl, dst = &addr; } - dblk = sw_desc->desc_blocks; - desc = dblk->virt_addr; - desc_num = 1; + desc_num = 0; for_each_sg(sgl, sg, sg_len, i) { addr = sg_dma_address(sg); - rest = sg_dma_len(sg); - - do { - len = min_t(u32, rest, XDMA_DESC_BLEN_MAX); - /* set hardware descriptor */ - desc->bytes = cpu_to_le32(len); - desc->src_addr = cpu_to_le64(*src); - desc->dst_addr = cpu_to_le64(*dst); - - if (!(desc_num & XDMA_DESC_ADJACENT_MASK)) { - dblk++; - desc = dblk->virt_addr; - } else { - desc++; - } - - desc_num++; - dev_addr += len; - addr += len; - rest -= len; - } while (rest); + desc_num += xdma_fill_descs(sw_desc, *src, *dst, sg_dma_len(sg), desc_num); + dev_addr += sg_dma_len(sg); } tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags); @@ -576,9 +662,9 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address, struct xdma_device *xdev = xdma_chan->xdev_hdl; unsigned int periods = size / period_size; struct dma_async_tx_descriptor *tx_desc; - struct xdma_desc_block *dblk; - struct xdma_hw_desc *desc; struct xdma_desc *sw_desc; + u64 addr, dev_addr, *src, *dst; + u32 desc_num; unsigned int i; /* @@ -602,22 +688,23 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address, sw_desc->periods = periods; sw_desc->period_size = period_size; sw_desc->dir = dir; + sw_desc->interleaved_dma = false; - dblk = sw_desc->desc_blocks; - desc = dblk->virt_addr; + addr = address; + if (dir == DMA_MEM_TO_DEV) { + dev_addr = xdma_chan->cfg.dst_addr; + src = &addr; + dst = &dev_addr; + } else { + dev_addr = xdma_chan->cfg.src_addr; + src = &dev_addr; + dst = &addr; + } - /* fill hardware descriptor */ + desc_num = 0; for (i = 0; i < periods; i++) { - desc->bytes = cpu_to_le32(period_size); - if (dir == DMA_MEM_TO_DEV) { - desc->src_addr = cpu_to_le64(address + i * period_size); - desc->dst_addr = cpu_to_le64(xdma_chan->cfg.dst_addr); - } else { - desc->src_addr = cpu_to_le64(xdma_chan->cfg.src_addr); - desc->dst_addr = cpu_to_le64(address + i * period_size); - } - - desc++; + desc_num += xdma_fill_descs(sw_desc, *src, *dst, period_size, desc_num); + addr += i * period_size; } tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags); @@ -632,6 +719,57 @@ failed: return NULL; } +/** + * xdma_prep_interleaved_dma - Prepare virtual descriptor for interleaved DMA transfers + * @chan: DMA channel + * @xt: DMA transfer template + * @flags: tx flags + */ +static struct dma_async_tx_descriptor * +xdma_prep_interleaved_dma(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + int i; + u32 desc_num = 0, period_size = 0; + struct dma_async_tx_descriptor *tx_desc; + struct xdma_chan *xchan = to_xdma_chan(chan); + struct xdma_desc *sw_desc; + u64 src_addr, dst_addr; + + for (i = 0; i < xt->frame_size; ++i) + desc_num += DIV_ROUND_UP(xt->sgl[i].size, XDMA_DESC_BLEN_MAX); + + sw_desc = xdma_alloc_desc(xchan, desc_num, false); + if (!sw_desc) + return NULL; + sw_desc->dir = xt->dir; + sw_desc->interleaved_dma = true; + sw_desc->cyclic = flags & DMA_PREP_REPEAT; + sw_desc->frames_left = xt->numf; + sw_desc->periods = xt->numf; + + desc_num = 0; + src_addr = xt->src_start; + dst_addr = xt->dst_start; + for (i = 0; i < xt->frame_size; ++i) { + desc_num += xdma_fill_descs(sw_desc, src_addr, dst_addr, xt->sgl[i].size, desc_num); + src_addr += dmaengine_get_src_icg(xt, &xt->sgl[i]) + (xt->src_inc ? + xt->sgl[i].size : 0); + dst_addr += dmaengine_get_dst_icg(xt, &xt->sgl[i]) + (xt->dst_inc ? + xt->sgl[i].size : 0); + period_size += xt->sgl[i].size; + } + sw_desc->period_size = period_size; + + tx_desc = vchan_tx_prep(&xchan->vchan, &sw_desc->vdesc, flags); + if (tx_desc) + return tx_desc; + + xdma_free_desc(&sw_desc->vdesc); + return NULL; +} + /** * xdma_device_config - Configure the DMA channel * @chan: DMA channel @@ -677,9 +815,8 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan) return -EINVAL; } - xdma_chan->desc_pool = dma_pool_create(dma_chan_name(chan), - dev, XDMA_DESC_BLOCK_SIZE, - XDMA_DESC_BLOCK_ALIGN, 0); + xdma_chan->desc_pool = dma_pool_create(dma_chan_name(chan), dev, XDMA_DESC_BLOCK_SIZE, + XDMA_DESC_BLOCK_ALIGN, XDMA_DESC_BLOCK_BOUNDARY); if (!xdma_chan->desc_pool) { xdma_err(xdev, "unable to allocate descriptor pool"); return -ENOMEM; @@ -706,20 +843,20 @@ static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie spin_lock_irqsave(&xdma_chan->vchan.lock, flags); vd = vchan_find_desc(&xdma_chan->vchan, cookie); - if (vd) - desc = to_xdma_desc(vd); - if (!desc || !desc->cyclic) { - spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags); - return ret; - } - - period_idx = desc->completed_desc_num % desc->periods; - residue = (desc->periods - period_idx) * desc->period_size; + if (!vd) + goto out; + desc = to_xdma_desc(vd); + if (desc->error) { + ret = DMA_ERROR; + } else if (desc->cyclic) { + period_idx = desc->completed_desc_num % desc->periods; + residue = (desc->periods - period_idx) * desc->period_size; + dma_set_residue(state, residue); + } +out: spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags); - dma_set_residue(state, residue); - return ret; } @@ -732,11 +869,12 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) { struct xdma_chan *xchan = dev_id; u32 complete_desc_num = 0; - struct xdma_device *xdev; - struct virt_dma_desc *vd; + struct xdma_device *xdev = xchan->xdev_hdl; + struct virt_dma_desc *vd, *next_vd; struct xdma_desc *desc; int ret; u32 st; + bool repeat_tx; spin_lock(&xchan->vchan.lock); @@ -745,45 +883,76 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) if (!vd) goto out; - xchan->busy = false; + /* Clear-on-read the status register */ + ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &st); + if (ret) + goto out; + desc = to_xdma_desc(vd); - xdev = xchan->xdev_hdl; + + st &= XDMA_CHAN_STATUS_MASK; + if ((st & XDMA_CHAN_ERROR_MASK) || + !(st & (CHAN_CTRL_IE_DESC_COMPLETED | CHAN_CTRL_IE_DESC_STOPPED))) { + desc->error = true; + xdma_err(xdev, "channel error, status register value: 0x%x", st); + goto out; + } ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_COMPLETED_DESC, &complete_desc_num); if (ret) goto out; - desc->completed_desc_num += complete_desc_num; + if (desc->interleaved_dma) { + xchan->busy = false; + desc->completed_desc_num += complete_desc_num; + if (complete_desc_num == XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT) { + xdma_xfer_start(xchan); + goto out; + } - if (desc->cyclic) { - ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS, - &st); - if (ret) + /* last desc of any frame */ + desc->frames_left--; + if (desc->frames_left) + goto out; + + /* last desc of the last frame */ + repeat_tx = vd->tx.flags & DMA_PREP_REPEAT; + next_vd = list_first_entry_or_null(&vd->node, struct virt_dma_desc, node); + if (next_vd) + repeat_tx = repeat_tx && !(next_vd->tx.flags & DMA_PREP_LOAD_EOT); + if (repeat_tx) { + desc->frames_left = desc->periods; + desc->completed_desc_num = 0; + vchan_cyclic_callback(vd); + } else { + list_del(&vd->node); + vchan_cookie_complete(vd); + } + /* start (or continue) the tx of a first desc on the vc.desc_issued list, if any */ + xdma_xfer_start(xchan); + } else if (!desc->cyclic) { + xchan->busy = false; + desc->completed_desc_num += complete_desc_num; + + /* if all data blocks are transferred, remove and complete the request */ + if (desc->completed_desc_num == desc->desc_num) { + list_del(&vd->node); + vchan_cookie_complete(vd); goto out; + } - regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_STATUS, st); + if (desc->completed_desc_num > desc->desc_num || + complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT) + goto out; + /* transfer the rest of data */ + xdma_xfer_start(xchan); + } else { + desc->completed_desc_num = complete_desc_num; vchan_cyclic_callback(vd); - goto out; - } - - /* - * if all data blocks are transferred, remove and complete the request - */ - if (desc->completed_desc_num == desc->desc_num) { - list_del(&vd->node); - vchan_cookie_complete(vd); - goto out; } - if (desc->completed_desc_num > desc->desc_num || - complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT) - goto out; - - /* transfer the rest of data (SG only) */ - xdma_xfer_start(xchan); - out: spin_unlock(&xchan->vchan.lock); return IRQ_HANDLED; @@ -1080,6 +1249,9 @@ static int xdma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, xdev->dma_dev.cap_mask); dma_cap_set(DMA_PRIVATE, xdev->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, xdev->dma_dev.cap_mask); + dma_cap_set(DMA_INTERLEAVE, xdev->dma_dev.cap_mask); + dma_cap_set(DMA_REPEAT, xdev->dma_dev.cap_mask); + dma_cap_set(DMA_LOAD_EOT, xdev->dma_dev.cap_mask); xdev->dma_dev.dev = &pdev->dev; xdev->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; @@ -1089,10 +1261,13 @@ static int xdma_probe(struct platform_device *pdev) xdev->dma_dev.device_prep_slave_sg = xdma_prep_device_sg; xdev->dma_dev.device_config = xdma_device_config; xdev->dma_dev.device_issue_pending = xdma_issue_pending; + xdev->dma_dev.device_terminate_all = xdma_terminate_all; + xdev->dma_dev.device_synchronize = xdma_synchronize; xdev->dma_dev.filter.map = pdata->device_map; xdev->dma_dev.filter.mapcnt = pdata->device_map_cnt; xdev->dma_dev.filter.fn = xdma_filter_fn; xdev->dma_dev.device_prep_dma_cyclic = xdma_prep_dma_cyclic; + xdev->dma_dev.device_prep_interleaved_dma = xdma_prep_interleaved_dma; ret = dma_async_device_register(&xdev->dma_dev); if (ret) { diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 69587d85a7cd20417b83be157aa4134784eae389..b82815e64d24e8352ac025bc1bd52de497530d66 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -309,7 +309,7 @@ static ssize_t xilinx_dpdma_debugfs_desc_done_irq_read(char *buf) out_str_len = strlen(XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR); out_str_len = min_t(size_t, XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE, - out_str_len); + out_str_len + 1); snprintf(buf, out_str_len, "%d", dpdma_debugfs.xilinx_dpdma_irq_done_count); diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 19706bd2642adbec408cbba3bf8cf8bda4b51a6f..82fcfd29bc4d29116b051c946edb9b6535fd78ac 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(sysfb_disable); static __init int sysfb_init(void) { - const struct screen_info *si = &screen_info; + struct screen_info *si = &screen_info; struct simplefb_platform_data mode; const char *name; bool compatible; @@ -119,18 +119,6 @@ static __init int sysfb_init(void) if (ret) goto err; - /* - * The firmware framebuffer is now maintained by the created - * device. Disable screen_info after we've consumed it. Prevents - * invalid access during kexec reboots. - * - * TODO: Vgacon still relies on the global screen_info. Make - * vgacon work with the platform device, so we can clear - * the screen_info unconditionally. - */ - if (strcmp(name, "platform-framebuffer")) - screen_info.orig_video_isVGA = 0; - goto unlock_mutex; err: platform_device_put(pd); diff --git a/drivers/gpio/gpio-en7523.c b/drivers/gpio/gpio-en7523.c index f836a8db4c1d21f8c44370246e8ad97bfedb064f..69834db2c1cf26be379c0deca38dda889202f706 100644 --- a/drivers/gpio/gpio-en7523.c +++ b/drivers/gpio/gpio-en7523.c @@ -12,11 +12,11 @@ #define AIROHA_GPIO_MAX 32 /** - * airoha_gpio_ctrl - Airoha GPIO driver data + * struct airoha_gpio_ctrl - Airoha GPIO driver data * @gc: Associated gpio_chip instance. * @data: The data register. - * @dir0: The direction register for the lower 16 pins. - * @dir1: The direction register for the higher 16 pins. + * @dir: [0] The direction register for the lower 16 pins. + * [1]: The direction register for the higher 16 pins. * @output: The output enable register. */ struct airoha_gpio_ctrl { diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 7a3e1760fc5b7d1a754d25d05df1bb30fbbad396..d5906d419b0ab996a5286d8cc411929385bf2c4b 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -215,6 +215,8 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gs->gpio_clr_io + MLXBF_GPIO_FW_DATA_OUT_CLEAR, gs->gpio_set_io + MLXBF_GPIO_FW_OUTPUT_ENABLE_SET, gs->gpio_clr_io + MLXBF_GPIO_FW_OUTPUT_ENABLE_CLEAR, 0); + if (ret) + return dev_err_probe(dev, ret, "%s: bgpio_init() failed", __func__); gc->request = gpiochip_generic_request; gc->free = gpiochip_generic_free; diff --git a/drivers/gpio/gpio-rtd.c b/drivers/gpio/gpio-rtd.c index a7939bd0aa566e94ac5093e7fc494b7535623b9a..bf7f008f58d703347cba14f35c19f5798ee3a949 100644 --- a/drivers/gpio/gpio-rtd.c +++ b/drivers/gpio/gpio-rtd.c @@ -525,18 +525,21 @@ static int rtd_gpio_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct gpio_irq_chip *irq_chip; struct rtd_gpio *data; + int ret; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - data->irqs[0] = platform_get_irq(pdev, 0); - if (data->irqs[0] < 0) - return data->irqs[0]; + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + data->irqs[0] = ret; - data->irqs[1] = platform_get_irq(pdev, 1); - if (data->irqs[1] < 0) - return data->irqs[1]; + ret = platform_get_irq(pdev, 1); + if (ret < 0) + return ret; + data->irqs[1] = ret; data->info = device_get_match_data(dev); if (!data->info) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 946da96d4d87af316c7646c279b449e2a15f5fc2..de5759546417c712e08727a0ab3564a47ed46916 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -184,7 +184,7 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, const char *propname; bool active_high; } gpios[] = { -#if !IS_ENABLED(CONFIG_LCD_HX8357) +#if IS_ENABLED(CONFIG_LCD_HX8357) /* * Himax LCD controllers used incorrectly named * "gpios-reset" property and also specified wrong @@ -478,7 +478,7 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, */ const char *compatible; } gpios[] = { -#if !IS_ENABLED(CONFIG_LCD_HX8357) +#if IS_ENABLED(CONFIG_LCD_HX8357) /* Himax LCD controllers used "gpios-reset" */ { "reset", "gpios-reset", "himax,hx8357" }, { "reset", "gpios-reset", "himax,hx8369" }, diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 4dbf298bb5dda0f3fadc58843707d593a1adbc5c..6bf5332136e5a9d4bd0b4c52618fa05241ccaa00 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -768,25 +768,6 @@ int gpiochip_sysfs_register(struct gpio_device *gdev) return 0; } -int gpiochip_sysfs_register_all(void) -{ - struct gpio_device *gdev; - int ret; - - guard(rwsem_read)(&gpio_devices_sem); - - list_for_each_entry(gdev, &gpio_devices, list) { - if (gdev->mockdev) - continue; - - ret = gpiochip_sysfs_register(gdev); - if (ret) - return ret; - } - - return 0; -} - void gpiochip_sysfs_unregister(struct gpio_device *gdev) { struct gpio_desc *desc; @@ -811,7 +792,9 @@ void gpiochip_sysfs_unregister(struct gpio_device *gdev) static int __init gpiolib_sysfs_init(void) { - int status; + int status; + unsigned long flags; + struct gpio_device *gdev; status = class_register(&gpio_class); if (status < 0) @@ -823,6 +806,26 @@ static int __init gpiolib_sysfs_init(void) * We run before arch_initcall() so chip->dev nodes can have * registered, and so arch_initcall() can always gpiod_export(). */ - return gpiochip_sysfs_register_all(); + spin_lock_irqsave(&gpio_lock, flags); + list_for_each_entry(gdev, &gpio_devices, list) { + if (gdev->mockdev) + continue; + + /* + * TODO we yield gpio_lock here because + * gpiochip_sysfs_register() acquires a mutex. This is unsafe + * and needs to be fixed. + * + * Also it would be nice to use gpio_device_find() here so we + * can keep gpio_chips local to gpiolib.c, but the yield of + * gpio_lock prevents us from doing this. + */ + spin_unlock_irqrestore(&gpio_lock, flags); + status = gpiochip_sysfs_register(gdev); + spin_lock_irqsave(&gpio_lock, flags); + } + spin_unlock_irqrestore(&gpio_lock, flags); + + return status; } postcore_initcall(gpiolib_sysfs_init); diff --git a/drivers/gpio/gpiolib-sysfs.h b/drivers/gpio/gpiolib-sysfs.h index ab157cec0b4bec5ae89249fedbd8374e1cd0d91e..b794b396d6a52588c93839fbba062eb160236ca4 100644 --- a/drivers/gpio/gpiolib-sysfs.h +++ b/drivers/gpio/gpiolib-sysfs.h @@ -8,7 +8,6 @@ struct gpio_device; #ifdef CONFIG_GPIO_SYSFS int gpiochip_sysfs_register(struct gpio_device *gdev); -int gpiochip_sysfs_register_all(void); void gpiochip_sysfs_unregister(struct gpio_device *gdev); #else @@ -18,11 +17,6 @@ static inline int gpiochip_sysfs_register(struct gpio_device *gdev) return 0; } -static inline int gpiochip_sysfs_register_all(void) -{ - return 0; -} - static inline void gpiochip_sysfs_unregister(struct gpio_device *gdev) { } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4c93cf73a8260569de5287a4b2ae231dc54dbab6..44c8f5743a2416087b523e973967e993e8a192a1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -16,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -83,9 +81,7 @@ DEFINE_SPINLOCK(gpio_lock); static DEFINE_MUTEX(gpio_lookup_lock); static LIST_HEAD(gpio_lookup_list); - LIST_HEAD(gpio_devices); -DECLARE_RWSEM(gpio_devices_sem); static DEFINE_MUTEX(gpio_machine_hogs_mutex); static LIST_HEAD(gpio_machine_hogs); @@ -117,15 +113,20 @@ static inline void desc_set_label(struct gpio_desc *d, const char *label) struct gpio_desc *gpio_to_desc(unsigned gpio) { struct gpio_device *gdev; + unsigned long flags; + + spin_lock_irqsave(&gpio_lock, flags); - scoped_guard(rwsem_read, &gpio_devices_sem) { - list_for_each_entry(gdev, &gpio_devices, list) { - if (gdev->base <= gpio && - gdev->base + gdev->ngpio > gpio) - return &gdev->descs[gpio - gdev->base]; + list_for_each_entry(gdev, &gpio_devices, list) { + if (gdev->base <= gpio && + gdev->base + gdev->ngpio > gpio) { + spin_unlock_irqrestore(&gpio_lock, flags); + return &gdev->descs[gpio - gdev->base]; } } + spin_unlock_irqrestore(&gpio_lock, flags); + if (!gpio_is_valid(gpio)) pr_warn("invalid GPIO %d\n", gpio); @@ -398,21 +399,26 @@ static int gpiodev_add_to_list_unlocked(struct gpio_device *gdev) static struct gpio_desc *gpio_name_to_desc(const char * const name) { struct gpio_device *gdev; + unsigned long flags; if (!name) return NULL; - guard(rwsem_read)(&gpio_devices_sem); + spin_lock_irqsave(&gpio_lock, flags); list_for_each_entry(gdev, &gpio_devices, list) { struct gpio_desc *desc; for_each_gpio_desc(gdev->chip, desc) { - if (desc->name && !strcmp(desc->name, name)) + if (desc->name && !strcmp(desc->name, name)) { + spin_unlock_irqrestore(&gpio_lock, flags); return desc; + } } } + spin_unlock_irqrestore(&gpio_lock, flags); + return NULL; } @@ -807,6 +813,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *request_key) { struct gpio_device *gdev; + unsigned long flags; unsigned int i; int base = 0; int ret = 0; @@ -871,46 +878,49 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, gdev->ngpio = gc->ngpio; - scoped_guard(rwsem_write, &gpio_devices_sem) { - /* - * TODO: this allocates a Linux GPIO number base in the global - * GPIO numberspace for this chip. In the long run we want to - * get *rid* of this numberspace and use only descriptors, but - * it may be a pipe dream. It will not happen before we get rid - * of the sysfs interface anyways. - */ - base = gc->base; + spin_lock_irqsave(&gpio_lock, flags); + /* + * TODO: this allocates a Linux GPIO number base in the global + * GPIO numberspace for this chip. In the long run we want to + * get *rid* of this numberspace and use only descriptors, but + * it may be a pipe dream. It will not happen before we get rid + * of the sysfs interface anyways. + */ + base = gc->base; + if (base < 0) { + base = gpiochip_find_base_unlocked(gc->ngpio); if (base < 0) { - base = gpiochip_find_base_unlocked(gc->ngpio); - if (base < 0) { - ret = base; - base = 0; - goto err_free_label; - } - /* - * TODO: it should not be necessary to reflect the assigned - * base outside of the GPIO subsystem. Go over drivers and - * see if anyone makes use of this, else drop this and assign - * a poison instead. - */ - gc->base = base; - } else { - dev_warn(&gdev->dev, - "Static allocation of GPIO base is deprecated, use dynamic allocation.\n"); - } - gdev->base = base; - - ret = gpiodev_add_to_list_unlocked(gdev); - if (ret) { - chip_err(gc, "GPIO integer space overlap, cannot add chip\n"); + spin_unlock_irqrestore(&gpio_lock, flags); + ret = base; + base = 0; goto err_free_label; } + /* + * TODO: it should not be necessary to reflect the assigned + * base outside of the GPIO subsystem. Go over drivers and + * see if anyone makes use of this, else drop this and assign + * a poison instead. + */ + gc->base = base; + } else { + dev_warn(&gdev->dev, + "Static allocation of GPIO base is deprecated, use dynamic allocation.\n"); + } + gdev->base = base; - for (i = 0; i < gc->ngpio; i++) - gdev->descs[i].gdev = gdev; + ret = gpiodev_add_to_list_unlocked(gdev); + if (ret) { + spin_unlock_irqrestore(&gpio_lock, flags); + chip_err(gc, "GPIO integer space overlap, cannot add chip\n"); + goto err_free_label; } + for (i = 0; i < gc->ngpio; i++) + gdev->descs[i].gdev = gdev; + + spin_unlock_irqrestore(&gpio_lock, flags); + BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); init_rwsem(&gdev->sem); @@ -1001,8 +1011,9 @@ err_free_gpiochip_mask: goto err_print_message; } err_remove_from_list: - scoped_guard(rwsem_write, &gpio_devices_sem) - list_del(&gdev->list); + spin_lock_irqsave(&gpio_lock, flags); + list_del(&gdev->list); + spin_unlock_irqrestore(&gpio_lock, flags); err_free_label: kfree_const(gdev->label); err_free_descs: @@ -1065,7 +1076,7 @@ void gpiochip_remove(struct gpio_chip *gc) dev_crit(&gdev->dev, "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n"); - scoped_guard(rwsem_write, &gpio_devices_sem) + scoped_guard(spinlock_irqsave, &gpio_lock) list_del(&gdev->list); /* @@ -1114,7 +1125,7 @@ struct gpio_device *gpio_device_find(void *data, */ might_sleep(); - guard(rwsem_read)(&gpio_devices_sem); + guard(spinlock_irqsave)(&gpio_lock); list_for_each_entry(gdev, &gpio_devices, list) { if (gdev->chip && match(gdev->chip, data)) @@ -4725,33 +4736,35 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_device *gdev) static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos) { + unsigned long flags; struct gpio_device *gdev = NULL; loff_t index = *pos; s->private = ""; - guard(rwsem_read)(&gpio_devices_sem); - - list_for_each_entry(gdev, &gpio_devices, list) { - if (index-- == 0) + spin_lock_irqsave(&gpio_lock, flags); + list_for_each_entry(gdev, &gpio_devices, list) + if (index-- == 0) { + spin_unlock_irqrestore(&gpio_lock, flags); return gdev; - } + } + spin_unlock_irqrestore(&gpio_lock, flags); return NULL; } static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos) { + unsigned long flags; struct gpio_device *gdev = v; void *ret = NULL; - scoped_guard(rwsem_read, &gpio_devices_sem) { - if (list_is_last(&gdev->list, &gpio_devices)) - ret = NULL; - else - ret = list_first_entry(&gdev->list, struct gpio_device, - list); - } + spin_lock_irqsave(&gpio_lock, flags); + if (list_is_last(&gdev->list, &gpio_devices)) + ret = NULL; + else + ret = list_first_entry(&gdev->list, struct gpio_device, list); + spin_unlock_irqrestore(&gpio_lock, flags); s->private = "\n"; ++*pos; diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 97df54abf57ac0f2a740d72bf37965b299d87927..a4a2520b5f31cced763696970a452ab1fff0e4bf 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -15,7 +15,6 @@ #include /* for enum gpiod_flags */ #include #include -#include #include #include @@ -137,7 +136,6 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); extern spinlock_t gpio_lock; extern struct list_head gpio_devices; -extern struct rw_semaphore gpio_devices_sem; void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9da14436a3738f8b950db3aa44a71b47c6e4952c..3d8a48f46b015613dc44517ebd20d5250df5a3b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -254,8 +254,6 @@ extern int amdgpu_agp; extern int amdgpu_wbrf; -extern int fw_bo_location; - #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -1146,6 +1144,7 @@ struct amdgpu_device { bool debug_vm; bool debug_largebar; bool debug_disable_soft_recovery; + bool debug_use_vram_fw_buf; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 067690ba7bffd4192817fe3acf9d46a19f1f274c..77e2636602887034c188ec695591d20e5b087b60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -138,6 +138,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } +static const struct drm_client_funcs kfd_client_funcs = { + .unregister = drm_client_release, +}; void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -161,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); if (ret) { dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); return; @@ -695,10 +698,8 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - /* Temporary workaround to fix issues observed in some - * compute applications when GFXOFF is enabled on GFX11. - */ - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cf6ed5fce291f946854d329fa91e0fb6eedbc61a..f262b9d89541a8a971a394b5f0da0f6a1368ba65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -311,7 +311,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence **ef); + struct dma_fence __rcu **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d17b2452cb1f69df276dd95518cf0ca340539237..f183d7faeeece16cfc7c211f5a6a0232dce37c36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2802,7 +2802,7 @@ unlock_out: put_task_struct(usertask); } -static void replace_eviction_fence(struct dma_fence **ef, +static void replace_eviction_fence(struct dma_fence __rcu **ef, struct dma_fence *new_ef) { struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true @@ -2837,7 +2837,7 @@ static void replace_eviction_fence(struct dma_fence **ef, * 7. Add fence to all PD and PT BOs. * 8. Unreserve all BOs */ -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) { struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5bb444bb36cece19b00fe27f0923c42b1bc1c83f..b158d27d0a71cbbafb55f0d58657c1ec178fa6c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1544,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); + release_firmware(adev->pm.fw); if (fw_ver < 0x00160e00) return true; } @@ -5245,7 +5246,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - bool gpu_reset_for_dev_remove = 0; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, @@ -5265,10 +5265,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - /* * ASIC reset has to be done on all XGMI hive nodes ASAP * to allow proper links negotiation in FW (within 1 sec) @@ -5311,18 +5307,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_ras_intr_cleared(); } - /* Since the mode1 reset affects base ip blocks, the - * phase1 ip blocks need to be resumed. Otherwise there - * will be a BIOS signature error and the psp bootloader - * can't load kdb on the next amdgpu install. - */ - if (gpu_reset_for_dev_remove) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) - amdgpu_device_ip_resume_phase1(tmp_adev); - - goto end; - } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ @@ -5559,11 +5543,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; - bool gpu_reset_for_dev_remove = false; - - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); /* * Special case: RAS triggered and full reset isn't supported @@ -5601,7 +5580,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); - if (gpu_reset_for_dev_remove && adev->shutdown) + if (adev->shutdown) tmp_adev->shutdown = true; } if (!list_is_first(&adev->reset_list, &device_list)) @@ -5686,10 +5665,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (gpu_reset_for_dev_remove) { - /* Workaroud for ASICs need to disable SMC first */ - amdgpu_device_smu_fini_early(tmp_adev); - } r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -5721,9 +5696,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; - - if (!r && gpu_reset_for_dev_remove) - goto recover_end; } skip_hw_reset: @@ -5779,7 +5751,6 @@ skip_sched_resume: amdgpu_ras_set_error_query_ready(tmp_adev, true); } -recover_end: tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0431eafa86b5324f4d63cc6060cea30baa03088b..c7d60dd0fb975d47d749300c79f976da15892736 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); break; case IP_VERSION(9, 4, 3): - if (!amdgpu_exp_hw_support) - return -EINVAL; amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 852cec98ff262359fb823ccb26f1b9977da8dec9..cc69005f5b46e7b9f06d65db13287a617cc384e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), + AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -210,7 +211,6 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; -int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -990,10 +990,6 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); -MODULE_PARM_DESC(fw_bo_location, - "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); -module_param(fw_bo_location, int, 0644); - /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -2122,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: soft reset for GPU recovery disabled\n"); adev->debug_disable_soft_recovery = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) { + pr_info("debug: place fw in vram for frontdoor loading\n"); + adev->debug_use_vram_fw_buf = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2233,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + amdgpu_init_debug_options(adev); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; @@ -2313,8 +2316,6 @@ retry_init: amdgpu_get_secondary_funcs(adev); } - amdgpu_init_debug_options(adev); - return 0; err_pci: @@ -2336,38 +2337,6 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && - !amdgpu_sriov_vf(adev)) { - bool need_to_reset_gpu = false; - - if (adev->gmc.xgmi.num_physical_nodes > 1) { - struct amdgpu_hive_info *hive; - - hive = amdgpu_get_xgmi_hive(adev); - if (hive->device_remove_count == 0) - need_to_reset_gpu = true; - hive->device_remove_count++; - amdgpu_put_xgmi_hive(hive); - } else { - need_to_reset_gpu = true; - } - - /* Workaround for ASICs need to reset SMU. - * Called only when the first device is removed. - */ - if (need_to_reset_gpu) { - struct amdgpu_reset_context reset_context; - - adev->shutdown = true; - memset(&reset_context, 0, sizeof(reset_context)); - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } - } - amdgpu_driver_unload_kms(dev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d2f273d77e59557ba5185cbfa36e243788d3d86e..55784a9f26c4c83b17008a766130c234df8ecbaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1045,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) * seconds, so here, we just pick up three parts for emulation. */ ret = memcmp(vram_ptr, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + (size / 2), cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + size - 10, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } +release_buffer: amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, &vram_ptr); - return 0; + return ret; } static ssize_t current_memory_partition_show( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 82608df4339648a930528560d9e1a2ff5af3c459..d79cb13e1aa835db4028173957d24f3132049521 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -175,7 +175,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b5ebafd4a3adf82e37b29f9df84cbf6541955441..bf4f48fe438d1b5936852145c8b4c1059446381c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&ui32, &ui32_size)) { - return -EINVAL; + /* fall back to input power for backwards compat */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } } ui32 >>= 8; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2addbdf88394b8287b0ea9fb87297b3435e826fb..0328616473f80af861cd4a1176afc0221eee7db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fc42fb6ee1914b82e0bec3897cf92594f587423f..31823a30dea217b5af3a8a36624a01fab70b48a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -305,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; data->head.block = block_id; - /* only ue and ce errors are supported */ + /* only ue, ce and poison errors are supported */ if (!memcmp("ue", err, 2)) data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; else if (!memcmp("ce", err, 2)) data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else if (!memcmp("poison", err, 6)) + data->head.type = AMDGPU_RAS_ERROR__POISON; else return -EINVAL; @@ -431,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev, * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details * - * The error type is one of: ue, ce, where, + * The error type is one of: ue, ce and poison where, * ue is multi-uncorrectable * ce is single-correctable + * poison is poison * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. @@ -1067,8 +1070,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, mcm_info = &err_info->mcm_info; if (err_info->ce_count) { dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block, " - "no user action is needed\n", + "%lld new correctable hardware errors detected in %s block\n", mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, @@ -1080,8 +1082,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block, " - "no user action is needed\n", + "%lld correctable hardware errors detected in total in %s block\n", mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); } } @@ -1108,16 +1109,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", + "detected in %s block\n", adev->smuio.funcs->get_socket_id(adev), adev->smuio.funcs->get_die_id(adev), ras_mgr->err_data.ce_count, blk_name); } else { dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", + "detected in %s block\n", ras_mgr->err_data.ce_count, blk_name); } @@ -1920,7 +1919,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj struct amdgpu_iv_entry *entry) { dev_info(obj->adev->dev, - "Poison is created, no user action is needed.\n"); + "Poison is created\n"); } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -2920,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_query_poison_mode(adev); + /* Packed socket_id to ras feature mask bits[31:29] */ + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29); + /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index b0335a1c5e90cb8f000fe1989bfb20dfbbd53c58..19899f6b9b2b419a0fdf2ed84c71f0278963f511 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, - AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, }; struct amdgpu_reset_context { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index d334e42fe0ebe648e3efafb3970f650b615f716d..3e12763e477aa45724d0c16a1b514a5a299a76a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b6cd565562ad8d9a99270757fc2b37352600d2f3..4740dd65b99d6ccc107e5d63aba0f0d67d02d718 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -116,7 +116,7 @@ struct amdgpu_mem_stats; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 -/* Reserve 4MB VRAM for page tables */ +/* How much VRAM be reserved for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 6f149b54d4d3970c5fe0a8255f8f7a080433381a..b9a15d51eb5c30e554d4e4f7c1397e3ce51996d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -59,11 +59,8 @@ static inline uint16_t complete_integer_division_u16( static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) { - bool arg1_negative = numerator < 0; - bool arg2_negative = denominator < 0; - - uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator); - uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator); + u16 arg1_value = numerator; + u16 arg2_value = denominator; uint16_t remainder; @@ -100,9 +97,6 @@ static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) res_value += summand; } - if (arg1_negative ^ arg2_negative) - res_value = -res_value; - return res_value; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6cab882e8061e80f33bca5eb1a7b59d8cf0a687f..1592c63b3099b982d0b9bdda596919da8ec14f5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -43,7 +43,6 @@ struct amdgpu_hive_info { } pstate; struct amdgpu_reset_domain *reset_domain; - uint32_t device_remove_count; atomic_t ras_recovery; }; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index f0737fb3a999e03a44eb3c08f6e0099e3326c929..d1bba9c64e16d808fbaafd4e01d8764cb77b1a86 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -30,6 +30,8 @@ #define regATHUB_MISC_CNTL_V3_0_1 0x00d7 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 +#define regATHUB_MISC_CNTL_V3_3_0 0x00d8 +#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) @@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); break; + case IP_VERSION(3, 3, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0); + break; default: data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); break; @@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) case IP_VERSION(3, 0, 1): WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); break; + case IP_VERSION(3, 3, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data); + break; default: WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 73f6d7e72c737537f17264746b061a936b4960e5..d63cab294883b8b44caa908d5bafaeaf19750ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); - /* don't check this. There are apparently firmwares in the wild with - * incorrect size in the header - */ - if (err == -ENODEV) - goto out; + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) - dev_dbg(adev->dev, - "gfx10: amdgpu_ucode_request() failed \"%s\"\n", - fw_name); + goto out; + + /* don't validate this firmware. There are apparently firmwares + * in the wild with incorrect size in the header + */ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2fbcd9765980d01a79b9b295fc28d82a909c69a9..0ea0866c261f84e24e8494755387b3d22482a0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -115,7 +115,7 @@ static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), @@ -6383,6 +6383,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; mask = 1; counter = 0; gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 95d06da544e2a54ebe6fb10ad1309a0c8074814f..49aecdcee006959491e4dba90058faf35e205fdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -456,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 6d24c84924cb5dd646ddaa69bb91a3193493b5f4..19986ff6a48d7e773dcc892b9dccd585fe69c306 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 25a3da83e0fb97e5949221d17e3fcd63062dd29c..e90f33780803458c32843f2599c07e4f598ca659 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -597,8 +597,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 530549314ce46c541a192305d1a7e1db17f11ebf..a3ee3c4c650febb4ca6fa61c9b7b5b51f16ce60c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, uint64_t reg_value; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + dev_info(adev->dev, "Deferred error\n"); if (mc_umc_status) dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d630100b9e91b8588dc2e9611e279fba909e54c5..f856901055d34e605cd4ec51fbdfc3be18e2abeb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -1026,7 +1026,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) } else { res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); if (IS_ERR(res)) - return -ENOMEM; + return PTR_ERR(res); pgmap->range.start = res->start; pgmap->range.end = res->end; pgmap->type = MEMORY_DEVICE_PRIVATE; @@ -1042,10 +1042,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); - /* Disable SVM support capability */ - pgmap->type = 0; if (pgmap->type == MEMORY_DEVICE_PRIVATE) devm_release_mem_region(adev->dev, res->start, resource_size(res)); + /* Disable SVM support capability */ + pgmap->type = 0; return PTR_ERR(r); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 745024b313401261a73900360c3d50f62d8440d8..17fbedbf3651388edfcd0109a22d0fe9dfcd331f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -917,7 +917,7 @@ struct kfd_process { * fence will be triggered during eviction and new one will be created * during restore */ - struct dma_fence *ef; + struct dma_fence __rcu *ef; /* Work items for evicting and restoring BOs */ struct delayed_work eviction_work; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 71df51fcc1b0d80f42899a0e15ae454b3f03f2bc..717a60d7a4ea953b8dfc369b09d855ad74b49659 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1110,6 +1110,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); + struct dma_fence *ef; kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); @@ -1118,7 +1119,9 @@ static void kfd_process_wq_release(struct work_struct *work) * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. */ - dma_fence_signal(p->ef); + synchronize_rcu(); + ef = rcu_access_pointer(p->ef); + dma_fence_signal(ef); kfd_process_remove_sysfs(p); @@ -1127,7 +1130,7 @@ static void kfd_process_wq_release(struct work_struct *work) svm_range_list_fini(p); kfd_process_destroy_pdds(p); - dma_fence_put(p->ef); + dma_fence_put(ef); kfd_event_free_process(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ac84c4a2ca072a7629f1f933214bd2d17a230ac7..c50a0dc9c9c072f5692d003bce90aaaf13615c5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -404,14 +404,9 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); - if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { - /* We're not in the eviction worker. - * Signal the fence and synchronize with any - * pending eviction work. - */ + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) + /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); - cancel_work_sync(&svm_bo->eviction_work); - } dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -2345,8 +2340,10 @@ retry: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - /* Pairs with mmget in svm_range_add_list_work */ - mmput(mm); + /* Pairs with mmget in svm_range_add_list_work. If dropping the + * last mm refcount, schedule release work to avoid circular locking + */ + mmput_async(mm); spin_lock(&svms->deferred_list_lock); } @@ -2657,6 +2654,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, { struct vm_area_struct *vma; struct interval_tree_node *node; + struct rb_node *rb_node; unsigned long start_limit, end_limit; vma = vma_lookup(p->mm, addr << PAGE_SHIFT); @@ -2676,16 +2674,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, if (node) { end_limit = min(end_limit, node->start); /* Last range that ends before the fault address */ - node = container_of(rb_prev(&node->rb), - struct interval_tree_node, rb); + rb_node = rb_prev(&node->rb); } else { /* Last range must end before addr because * there was no range after addr */ - node = container_of(rb_last(&p->svms.objects.rb_root), - struct interval_tree_node, rb); + rb_node = rb_last(&p->svms.objects.rb_root); } - if (node) { + if (rb_node) { + node = container_of(rb_node, struct interval_tree_node, rb); if (node->last >= addr) { WARN(1, "Overlap with prev node and page fault addr\n"); return -EFAULT; @@ -3432,13 +3429,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) { - if (!fence) - return -EINVAL; - - if (dma_fence_is_signaled(&fence->base)) - return 0; - - if (fence->svm_bo) { + /* Dereferencing fence->svm_bo is safe here because the fence hasn't + * signaled yet and we're under the protection of the fence->lock. + * After the fence is signaled in svm_range_bo_release, we cannot get + * here any more. + * + * Reference is dropped in svm_range_evict_svm_bo_worker. + */ + if (svm_bo_ref_unless_zero(fence->svm_bo)) { WRITE_ONCE(fence->svm_bo->evicting, 1); schedule_work(&fence->svm_bo->eviction_work); } @@ -3453,8 +3451,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) int r = 0; svm_bo = container_of(work, struct svm_range_bo, eviction_work); - if (!svm_bo_ref_unless_zero(svm_bo)) - return; /* svm_bo was freed while eviction was pending */ if (mmget_not_zero(svm_bo->eviction_fence->mm)) { mm = svm_bo->eviction_fence->mm; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f6575d7dee97150146c6112c2be4fea52ddbe20a..d4f525b66a09055909e163b815beee357f28d19d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7615,7 +7615,6 @@ create_i2c(struct ddc_service *ddc_service, if (!i2c) return NULL; i2c->base.owner = THIS_MODULE; - i2c->base.class = I2C_CLASS_DDC; i2c->base.dev.parent = &adev->pdev->dev; i2c->base.algo = &amdgpu_dm_i2c_algo; snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index); @@ -9293,10 +9292,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!new_con_state->writeback_job) continue; - new_crtc_state = NULL; + new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - if (acrtc) - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + if (!new_crtc_state) + continue; if (acrtc->wb_enabled) continue; @@ -10753,7 +10752,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; } - status = dc_validate_global_state(dc, dm_state->context, false); + status = dc_validate_global_state(dc, dm_state->context, true); if (status != DC_OK) { DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 9b527bffe11a1f55e1a63d94c937c829e0d6f820..c87b64e464ed5c8e13c6fb823b8bf9ca0bcfe0fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1239,7 +1239,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, if (has_crtc_cm_degamma && ret != -EINVAL) { drm_dbg_kms(crtc->base.crtc->dev, "doesn't support plane and CRTC degamma at the same time\n"); - return -EINVAL; + return -EINVAL; } /* If we are here, it means we don't have plane degamma settings, check diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index eaf8d9f482446d5ea9728ec17657189e25917ae8..85b7f58a7f35a478f551ec097b1613b504ced535 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync( struct aux_payload *payload, enum aux_return_code_type *operation_result) { + if (!link->hpd_status) { + *operation_result = AUX_RET_ERROR_HPD_DISCON; + return -1; + } + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7575282563267c6cd33872debc3f59b7819d35f5..a84f1e376dee45f7fbefea37053c0df57074789a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -87,6 +87,20 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define regCLK1_CLK2_BYPASS_CNTL 0x029c +#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L + +#define regCLK6_0_CLK6_spll_field_8 0x464b +#define regCLK6_0_CLK6_spll_field_8_BASE_IDX 0 + +#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT 0xd +#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L + #define REG(reg_name) \ (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -131,35 +145,63 @@ static int dcn314_get_active_display_cnt_wa( return display_count; } -static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } } +bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t ssc_enable; + + REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); + + return ssc_enable == 1; +} + +void dcn314_init_clocks(struct clk_mgr *clk_mgr) +{ + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + // Assumption is that boot state always supports pstate + clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk + clk_mgr->clks.p_state_change_support = true; + clk_mgr->clks.prev_p_state_change_support = true; + clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; + + // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk + if (dcn314_is_spll_ssc_enabled(clk_mgr)) + clk_mgr->dp_dto_source_clock_in_khz = + dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); + else + clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; +} + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) @@ -252,11 +294,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn314_disable_otg_wa(clk_mgr_base, context, true); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn314_disable_otg_wa(clk_mgr_base, context, false); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } @@ -436,6 +478,11 @@ static DpmClocks314_t dummy_clocks; static struct dcn314_watermarks dummy_wms = { 0 }; +static struct dcn314_ss_info_table ss_info_table = { + .ss_divider = 1000, + .ss_percentage = {0, 0, 375, 375, 375} +}; + static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table) { int i, num_valid_sets; @@ -708,13 +755,31 @@ static struct clk_mgr_funcs dcn314_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn314_update_clocks, - .init_clocks = dcn31_init_clocks, + .init_clocks = dcn314_init_clocks, .enable_pme_wa = dcn314_enable_pme_wa, .are_clock_states_equal = dcn314_are_clock_states_equal, .notify_wm_ranges = dcn314_notify_wm_ranges }; extern struct clk_mgr_funcs dcn3_fpga_funcs; +static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) +{ + uint32_t clock_source; + //uint32_t ssc_enable; + + REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); + //REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); + + if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { + clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; + + if (clk_mgr->dprefclk_ss_percentage != 0) { + clk_mgr->ss_on_dprefclk = true; + clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; + } + } +} + void dcn314_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn314 *clk_mgr, @@ -782,6 +847,7 @@ void dcn314_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = 600000; clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; dce_clock_read_ss_info(&clk_mgr->base); + dcn314_read_ss_info_from_lut(&clk_mgr->base); /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/ clk_mgr->base.base.bw_params = &dcn314_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index 171f84340eb2fb1d532776ac348cc1fbfad858f5..002c28e807208e584396fdc99dc1822072e8ffa5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -28,6 +28,8 @@ #define __DCN314_CLK_MGR_H__ #include "clk_mgr_internal.h" +#define DCN314_NUM_CLOCK_SOURCES 5 + struct dcn314_watermarks; struct dcn314_smu_watermark_set { @@ -40,9 +42,18 @@ struct clk_mgr_dcn314 { struct dcn314_smu_watermark_set smu_wm_set; }; +struct dcn314_ss_info_table { + uint32_t ss_divider; + uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES]; +}; + bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b); +bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base); + +void dcn314_init_clocks(struct clk_mgr *clk_mgr); + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 2d7205058c64abfece9cd154a4aebea8958e0168..aa7c02ba948e9ce63aa84eb7518f9c73c80d107a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -411,12 +411,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required) + if (dc->optimized_required || dc->wm_optimized_required) return false; - if (!memcmp(&stream->adjust, adjust, sizeof(*adjust))) - return true; - stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; @@ -2230,6 +2227,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; + dc->wm_optimized_required = false; } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -2652,6 +2650,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } + + dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -2859,6 +2859,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -3519,7 +3522,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program = resource_get_otg_master_for_stream( &context->res_ctx, stream); - + ASSERT(top_pipe_to_program != NULL); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4288,7 +4291,8 @@ static bool full_update_required(struct dc *dc, stream_update->mst_bw_update || stream_update->func_shaper || stream_update->lut3d_func || - stream_update->pending_test_pattern)) + stream_update->pending_test_pattern || + stream_update->crtc_timing_adjust)) return true; if (stream) { @@ -4341,6 +4345,8 @@ static bool should_commit_minimal_transition_for_windowed_mpo_odm(struct dc *dc, cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream); new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream); + if (!cur_pipe || !new_pipe) + return false; cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1; new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1; if (cur_is_odm_in_use == new_is_odm_in_use) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 57f0ddd1592399821222c4c5abd3708dbd2bd6b0..9fbdb09697fd5ea16abe86e4f970e80fb764ff7f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2194,6 +2194,10 @@ void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state) for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) { otg_master = resource_get_otg_master_for_stream( &state->res_ctx, state->streams[stream_idx]); + if (!otg_master || otg_master->stream_res.tg == NULL) { + DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx); + return; + } slice_count = resource_get_opp_heads_for_otg_master(otg_master, &state->res_ctx, opp_heads); for (slice_idx = 0; slice_idx < slice_count; slice_idx++) { @@ -4986,20 +4990,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, return DC_OK; } -bool resource_subvp_in_use(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) - return true; - } - return false; -} - bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream) { if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream)) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 460a8010c79fef0496755ce435f4691e20c3a08e..88c6436b28b69ca7f4791bdc47404cd5f73a5f83 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -267,7 +267,8 @@ void dc_state_construct(struct dc *dc, struct dc_state *state) state->clk_mgr = dc->clk_mgr; /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, state); + if (dc->res_pool) + link_enc_cfg_init(dc, state); } void dc_state_destruct(struct dc_state *state) @@ -433,8 +434,9 @@ bool dc_state_add_plane( otg_master_pipe = resource_get_otg_master_for_stream( &state->res_ctx, stream); - added = resource_append_dpp_pipes_for_plane_composition(state, - dc->current_state, pool, otg_master_pipe, plane_state); + if (otg_master_pipe) + added = resource_append_dpp_pipes_for_plane_composition(state, + dc->current_state, pool, otg_master_pipe, plane_state); if (added) { stream_status->plane_states[stream_status->plane_count] = diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f30a341bc09014b156dbe4463b41f84b0f16e083..5d7aa882416b3435a5dcfbaf502a9f326981bc81 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -51,7 +51,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.265" +#define DC_VER "3.2.266" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -1036,6 +1036,7 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; + bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index a23eebd9933b72ea5c1c2a951a560232250bf34c..ee10941caa5980999044407184e7a41b8548e6b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -139,6 +139,7 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; + uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; } bits; @@ -325,6 +326,7 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; + struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 4f276169e05a91098662edc07fd50d0bc1ed327b..b08ccb8c68bc366386e82a566c452459da0aabdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1140,23 +1140,25 @@ struct dc_panel_config { } ilr; }; +#define MAX_SINKS_PER_LINK 4 + /* * USB4 DPIA BW ALLOCATION STRUCTS */ struct dc_dpia_bw_alloc { - int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already - int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated - int sink_max_bw; // The Max BW that sink can require/support + int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks + int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already + int link_max_bw; // The Max BW that link can require/support + int allocated_bw; // The Actual Allocated BW for this DPIA int estimated_bw; // The estimated available BW for this DPIA int bw_granularity; // BW Granularity + int dp_overhead; // DP overhead in dp tunneling bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side uint8_t nrd_max_lane_count; // Non-reduced max lane count uint8_t nrd_max_link_rate; // Non-reduced max link rate }; -#define MAX_SINKS_PER_LINK 4 - enum dc_hpd_enable_select { HPD_EN_FOR_ALL_EDP = 0, HPD_EN_FOR_PRIMARY_EDP_ONLY, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 140598f18bbdd4cb4758ec7e9ec17c91286d0ecc..f0458b8f00af842b87ab91feadd71eef4c680e27 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -782,7 +782,7 @@ static void get_azalia_clock_info_dp( /*audio_dto_module = dpDtoSourceClockInkhz * 10,000; * [khz] ->[100Hz] */ azalia_clock_info->audio_dto_module = - pll_info->dp_dto_source_clock_in_khz * 10; + pll_info->audio_dto_source_clock_in_khz * 10; } void dce_aud_wall_dto_setup( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5d3f6fa1011e8e33f5e7772bee445cb6602e278d..970644b695cd4f1d96f166cc1786987b460cdafd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -975,6 +975,9 @@ static bool dcn31_program_pix_clk( look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10); struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; + + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; // For these signal types Driver to program DP_DTO without calling VBIOS Command table if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { @@ -1088,6 +1091,10 @@ static bool get_pixel_clk_frequency_100hz( struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); unsigned int clock_hz = 0; unsigned int modulo_hz = 0; + unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz; + + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) { clock_hz = REG_READ(PHASE[inst]); @@ -1100,7 +1107,7 @@ static bool get_pixel_clk_frequency_100hz( modulo_hz = REG_READ(MODULO[inst]); if (modulo_hz) *pixel_clk_khz = div_u64((uint64_t)clock_hz* - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, + dp_dto_ref_khz*10, modulo_hz); else *pixel_clk_khz = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e4a328b45c8a5153c6468486dff940a6eb9435a3..87760600e154dad46e911e28f0b2937e6e012602 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -183,6 +183,20 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, return true; } +bool dcn32_subvp_in_use(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) + return true; + } + return false; +} + bool dcn32_mpo_in_use(struct dc_state *context) { uint32_t i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index aa68d010cbfd247057da5a210b5209ad7a62ded3..9f37f717a1f86f88c5fa41bc30f477406d70f3b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -33,7 +33,6 @@ #include "dcn30/dcn30_resource.h" #include "link.h" #include "dc_state_priv.h" -#include "resource.h" #define DC_LOGGER_INIT(logger) @@ -292,7 +291,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, /* for subvp + DRR case, if subvp pipes are still present we support pstate */ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && - resource_subvp_in_use(dc, context)) + dcn32_subvp_in_use(dc, context)) vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; if (vlevel < context->bw_ctx.dml.vba.soc.num_states && @@ -2273,7 +2272,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool subvp_active = resource_subvp_in_use(dc, context); + bool subvp_in_use = dcn32_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; @@ -2282,7 +2281,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc_assert_fp_enabled(); /* need to find dummy latency index for subvp */ - if (subvp_active) { + if (subvp_in_use) { /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ if (!pstate_en) { context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; @@ -2468,7 +2467,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -3528,7 +3527,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb) void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context) { // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue) - if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) && + if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) && dc->dml.soc.num_chans <= 8) { int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 3d12dabd39e47d0d2a3fc918dd1d07dbc3902e5d..475c4ec43c013f481a71ad5668a8aef82ac7ba0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -166,9 +166,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .num_states = 5, .sr_exit_time_us = 14.0, .sr_enter_plus_exit_time_us = 16.0, - .sr_exit_z8_time_us = 525.0, - .sr_enter_plus_exit_z8_time_us = 715.0, - .fclk_change_latency_us = 20.0, + .sr_exit_z8_time_us = 210.0, + .sr_enter_plus_exit_z8_time_us = 320.0, + .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index b95bf27f2fe2fe9a943cda43ce121c07c548f5dc..9be5ebf3a8c0ba7805b793b108923e057f2fdfe0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6229,7 +6229,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], @@ -9446,13 +9446,13 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; // Output - CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; + CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa6a93dd9629558120304beae4da239a60268c0d..64d01a9cd68c859db9bcffbc478ef09090b07fbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -626,8 +626,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * if (is_dp2p0_output_encoder(pipe)) out->OutputEncoder[location] = dml_dp2p0; break; - out->OutputEncoder[location] = dml_edp; case SIGNAL_TYPE_EDP: + out->OutputEncoder[location] = dml_edp; break; case SIGNAL_TYPE_HDMI_TYPE_A: case SIGNAL_TYPE_DVI_SINGLE_LINK: diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index fb328cd06cea2c8a00f7450c8ade7408fb4716a9..5660f15da291e9de58637c115e315b07f1cee7a3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1354,7 +1354,7 @@ static void build_audio_output( if (state->clk_mgr && (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) { - audio_output->pll_info.dp_dto_source_clock_in_khz = + audio_output->pll_info.audio_dto_source_clock_in_khz = state->clk_mgr->funcs->get_dp_ref_clk_frequency( state->clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 51dd2ae09b2a6235f822c7eb6c72335f38fafe24..6dd479e8a348502c9b285a38f16650fb7cb4f95e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3076,7 +3076,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index bc71a9b058fedd2c211cf38088758ca6a71480b9..e931342fcf4cf1d4f4b0cf41628cd9f855fa6dac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1882,42 +1882,6 @@ static void dcn20_program_pipe( } } -static void update_vmin_vmax_fams(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - struct drr_params params = {0}; - bool subvp_in_use = resource_subvp_in_use(dc, context); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (resource_is_pipe_type(pipe, OTG_MASTER) && - ((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM && - pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) { - if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) { - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; - - /* DRR should be configured already if we're in active variable - * or active fixed, so only program if we're not in this state - */ - params.vertical_total_min = pipe->stream->timing.v_total; - params.vertical_total_max = pipe->stream->timing.v_total; - tg->funcs->set_drr(tg, ¶ms); - } - } else { - if (resource_is_pipe_type(pipe, OTG_MASTER) && - !pipe->stream->vrr_active_variable && - !pipe->stream->vrr_active_fixed) { - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; - params.vertical_total_min = 0; - params.vertical_total_max = 0; - tg->funcs->set_drr(tg, ¶ms); - } - } - } -} - void dcn20_program_front_end_for_ctx( struct dc *dc, struct dc_state *context) @@ -1994,7 +1958,6 @@ void dcn20_program_front_end_for_ctx( && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); - update_vmin_vmax_fams(dc, context); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) @@ -2196,10 +2159,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning optimized_required, use |= operator since we don't want + * For assigning wm_optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2212,10 +2175,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index cbba39d251e5335d6e11604fc327b8ad31284aea..17e014d3bdc8401893847a4f0fd9670d664f65c5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -333,6 +333,7 @@ struct clk_mgr { bool force_smu_not_present; bool dc_mode_softmax_enabled; int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes + int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314 int dentist_vco_freq_khz; struct clk_state_registers_and_bypass boot_snapshot; struct clk_bw_params *bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 1d51fed12e20037b3baf40ed7f3b89095437457e..c958ef37b78a667b1bb9bfb26827ae3e45053715 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -609,9 +609,6 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( struct pipe_ctx *sec_pipe, bool odm); -bool resource_subvp_in_use(struct dc *dc, - struct dc_state *context); - /* A test harness interface that modifies dp encoder resources in the given dc * state and bypasses the need to revalidate. The interface assumes that the * test harness interface is called with pre-validated link config stored in the diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 5fe8b4871c77614eb0fd46421db49fb79197e6f7..3cbfbf8d107e9b62c639ef1618041b8fc09dd9b5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -900,11 +900,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; struct dc_stream_state *stream = pipe_ctx->stream; - DC_LOGGER_INIT(dsc->ctx->logger); - if (!pipe_ctx->stream->timing.flags.DSC || !dsc) + if (!pipe_ctx->stream->timing.flags.DSC) return false; + if (!dsc) + return false; + + DC_LOGGER_INIT(dsc->ctx->logger); + if (enable) { struct dsc_config dsc_cfg; uint8_t dsc_packed_pps[128]; @@ -2005,17 +2009,11 @@ static enum dc_status enable_link_dp(struct dc_state *state, } } - /* - * If the link is DP-over-USB4 do the following: - * - Train with fallback when enabling DPIA link. Conventional links are + /* Train with fallback when enabling DPIA link. Conventional links are * trained with fallback during sink detection. - * - Allocate only what the stream needs for bw in Gbps. Inform the CM - * in case stream needs more or less bw from what has been allocated - * earlier at plug time. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) do_fallback = true; - } /* * Temporary w/a to get DP2.0 link rates to work with SST. @@ -2197,6 +2195,32 @@ static enum dc_status enable_link( return status; } +static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw) +{ + return true; +} + +static bool allocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing, + dc_link_get_highest_encoding_format(stream->sink->link)); + + ret = allocate_usb4_bandwidth_for_stream(stream, bw); + + return ret; +} + +static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + ret = allocate_usb4_bandwidth_for_stream(stream, 0); + + return ret; +} + void link_set_dpms_off(struct pipe_ctx *pipe_ctx) { struct dc *dc = pipe_ctx->stream->ctx->dc; @@ -2232,6 +2256,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) update_psp_stream_config(pipe_ctx, true); dc->hwss.blank_stream(pipe_ctx); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + deallocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && @@ -2474,6 +2501,9 @@ void link_set_dpms_on( } } + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + allocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index b45fda96eaf649bf16f291df2294d787680e0287..8fe66c3678508d9aee6779fa25cd6128e1f30832 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing( return DC_OK; } +/* + * This function calculates the bandwidth required for the stream timing + * and aggregates the stream bandwidth for the respective dpia link + * + * @stream: pointer to the dc_stream_state struct instance + * @num_streams: number of streams to be validated + * + * return: true if validation is succeeded + */ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams) { - bool ret = true; - int bw_needed[MAX_DPIA_NUM]; - struct dc_link *link[MAX_DPIA_NUM]; - - if (!num_streams || num_streams > MAX_DPIA_NUM) - return ret; + int bw_needed[MAX_DPIA_NUM] = {0}; + struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; + int num_dpias = 0; for (uint8_t i = 0; i < num_streams; ++i) { + if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { + /* new dpia sst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; - link[i] = stream[i].link; - bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, - dc_link_get_highest_encoding_format(link[i])); + dpia_link[num_dpias] = stream[i].link; + bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[num_dpias])); + num_dpias++; + } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + uint8_t j = 0; + /* check whether its a known dpia link */ + for (; j < num_dpias; ++j) { + if (dpia_link[j] == stream[i].link) + break; + } + + if (j == num_dpias) { + /* new dpia mst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; + else { + dpia_link[j] = stream[i].link; + num_dpias++; + } + } + + bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[j])); + } } - ret = dpia_validate_usb4_bw(link, bw_needed, num_streams); + /* Include dp overheads */ + for (uint8_t i = 0; i < num_dpias; ++i) { + int dp_overhead = 0; + + dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]); + bw_needed[i] += dp_overhead; + } - return ret; + return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 982eda3c46f5680af8c60042e4cc7fc8909f68d3..6af42ba9885c054ead528e11d14007622af098a8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -82,25 +82,33 @@ bool dpia_query_hpd_status(struct dc_link *link) { union dmub_rb_cmd cmd = {0}; struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv; - bool is_hpd_high = false; /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; - /* Return HPD status reported by DMUB if query successfully executed. */ - if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && - cmd.query_hpd.data.status == AUX_RET_SUCCESS) - is_hpd_high = cmd.query_hpd.data.result; - - DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n", - __func__, - link->link_index, - link->link_id.enum_id - ENUM_ID_1, - cmd.query_hpd.data.status, - cmd.query_hpd.data.result); - - return is_hpd_high; + /* Query dpia hpd status from dmub */ + if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + cmd.query_hpd.data.status == AUX_RET_SUCCESS) { + DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n", + __func__, + link->link_index, + link->link_id.enum_id - ENUM_ID_1, + link->hpd_status, + cmd.query_hpd.data.result); + link->hpd_status = cmd.query_hpd.data.result; + } else { + DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n", + __func__, + link->link_index, + link->link_id.enum_id - ENUM_ID_1, + cmd.query_hpd.data.status, + link->hpd_status); + link->hpd_status = false; + } + + return link->hpd_status; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index a7aa8c9da868fcf81a335b3d1384aeb8e37a1f42..dd0d2b206462c927c5f68b355498e71250c154b9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) static void reset_bw_alloc_struct(struct dc_link *link) { link->dpia_bw_alloc_config.bw_alloc_enabled = false; - link->dpia_bw_alloc_config.sink_verified_bw = 0; - link->dpia_bw_alloc_config.sink_max_bw = 0; + link->dpia_bw_alloc_config.link_verified_bw = 0; + link->dpia_bw_alloc_config.link_max_bw = 0; + link->dpia_bw_alloc_config.allocated_bw = 0; link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; + link->dpia_bw_alloc_config.dp_overhead = 0; link->dpia_bw_alloc_config.response_ready = false; - link->dpia_bw_alloc_config.sink_allocated_bw = 0; + link->dpia_bw_alloc_config.nrd_max_lane_count = 0; + link->dpia_bw_alloc_config.nrd_max_link_rate = 0; + for (int i = 0; i < MAX_SINKS_PER_LINK; i++) + link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0; + DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index); } #define BW_GRANULARITY_0 4 // 0.25 Gbps @@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && (link_dpia_secondary->hpd_status && link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { - total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + - link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.allocated_bw; } else if (link_dpia_primary->hpd_status && link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; @@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { + if (req_bw == link->dpia_bw_alloc_config.allocated_bw) { DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", __func__, link->link_index); } @@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", __func__, link->link_index); DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); + __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); - link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; + link->dpia_bw_alloc_config.allocated_bw = bw_needed; link->dpia_bw_alloc_config.response_ready = true; break; @@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (link->hpd_status && peak_bw > 0) { // If DP over USB4 then we need to check BW allocation - link->dpia_bw_alloc_config.sink_max_bw = peak_bw; - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); + link->dpia_bw_alloc_config.link_max_bw = peak_bw; + set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); do { if (timeout > 0) @@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = link->dpia_bw_alloc_config.sink_allocated_bw; + else if (link->dpia_bw_alloc_config.allocated_bw > 0) + ret = link->dpia_bw_alloc_config.allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea out: return ret; } - bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { bool ret = false; @@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", __func__, link->link_index, link->hpd_status, - link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); + link->dpia_bw_alloc_config.allocated_bw, req_bw); if (!get_bw_alloc_proceed_flag(link)) goto out; @@ -523,3 +528,30 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const return ret; } + +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) +{ + int dp_overhead = 0, link_mst_overhead = 0; + + if (!get_bw_alloc_proceed_flag((link))) + return dp_overhead; + + /* if its mst link, add MTPH overhead */ + if ((link->type == dc_connection_mst_branch) && + !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { + /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH + * MST overhead is 1/64 of link bandwidth (excluding any overhead) + */ + const struct dc_link_settings *link_cap = + dc_link_get_link_cap(link); + uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * + (uint32_t)link_cap->lane_count * + LINK_RATE_REF_FREQ_IN_KHZ * 8; + link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); + } + + /* add all the overheads */ + dp_overhead = link_mst_overhead; + + return dp_overhead; +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 981bc4eb6120e76ad959435be7ad716cdc498926..3b6d8494f9d5da4ceb05711c9596007ac73f08a2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res */ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias); +/* + * Obtain all the DP overheads in dp tunneling for the dpia link + * + * @link: pointer to the dc_link struct instance + * + * return: DP overheads in DP tunneling + */ +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); + #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 7f1196528218692c98f1f15375f153dfe56fe514..046d3e205415311cd63a98aa3c0e59c8aaea2e89 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -930,8 +930,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state) bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream) { /* To-do: Setup Replay */ - struct dc *dc = link->ctx->dc; - struct dmub_replay *replay = dc->res_pool->replay; + struct dc *dc; + struct dmub_replay *replay; int i; unsigned int panel_inst; struct replay_context replay_context = { 0 }; @@ -947,6 +947,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream if (!link) return false; + dc = link->ctx->dc; + + replay = dc->res_pool->replay; + if (!replay) return false; @@ -975,8 +979,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream replay_context.line_time_in_ns = lineTimeInNs; - if (replay) - link->replay_settings.replay_feature_enabled = + link->replay_settings.replay_feature_enabled = replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); if (link->replay_settings.replay_feature_enabled) { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index 91ea0d4da06a9443bb199759fde0c75ae44fc8f3..82349354332548e160494c23bee15acaa18b7630 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -166,12 +166,6 @@ static bool optc32_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, 0xf, OPTC_SEG1_SRC_SEL, 0xf, @@ -179,6 +173,15 @@ static bool optc32_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OPTC_MEMORY_CONFIG, + OPTC_MEM_SEL, 0); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); @@ -205,6 +208,13 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 08a59cf449cae5c27fe7dbe8fc1b2f847f462f9f..5b154750885030e171a483d30e014aa8f4bff8a1 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -138,12 +138,6 @@ static bool optc35_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, 0xf, OPTC_SEG1_SRC_SEL, 0xf, @@ -151,6 +145,15 @@ static bool optc35_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OPTC_MEMORY_CONFIG, + OPTC_MEM_SEL, 0); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index ac04a9c9a3d86808000942fd4eae12d5f9fdea66..c4d71e7f18af47ba47dbc89e1a9098a0a4eade04 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context( static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = resource_subvp_in_use, + .get_subvp_en = dcn32_subvp_in_use, }; void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 62611acd4bcb522c78fafdaa8d811101b65b5f42..0c87b0fabba7d96ff38180900e41f1438419912c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -131,6 +131,9 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc, bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, struct dc_state *context); +bool dcn32_subvp_in_use(struct dc *dc, + struct dc_state *context); + bool dcn32_mpo_in_use(struct dc_state *context); bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index e1ab207c46f15b1c6dd6e13ec35f6049674c0444..74412e5f03fefbaa9350982ac92bc528cc8e80e8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool) static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = resource_subvp_in_use, + .get_subvp_en = dcn32_subvp_in_use, }; static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h index 66a54da0641ce11feb10e1d777395f9bcd85f658..915a031a43cb286fdb03f2fb2788d0fa9e539b59 100644 --- a/drivers/gpu/drm/amd/display/include/audio_types.h +++ b/drivers/gpu/drm/amd/display/include/audio_types.h @@ -64,7 +64,7 @@ enum audio_dto_source { /* PLL information required for AZALIA DTO calculation */ struct audio_pll_info { - uint32_t dp_dto_source_clock_in_khz; + uint32_t audio_dto_source_clock_in_khz; uint32_t feed_back_divider; enum audio_dto_source dto_source; bool ss_enabled; diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index 7ee3d291120d5429d879745c2e63af38cd79f371..6f80bfa7e41ac9c1bdd2faaba4c298cc3f4f9d34 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -8707,10 +8707,10 @@ #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX 2 #define regBIF_BX1_BX_RESET_CNTL 0x00f0 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX 2 -#define regBIF_BX1_INTERRUPT_CNTL 0x8e11 -#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 5 -#define regBIF_BX1_INTERRUPT_CNTL2 0x8e12 -#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 5 +#define regBIF_BX1_INTERRUPT_CNTL 0x00f1 +#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 2 +#define regBIF_BX1_INTERRUPT_CNTL2 0x00f2 +#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 2 #define regBIF_BX1_CLKREQB_PAD_CNTL 0x00f8 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX 2 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC 0x00fb diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index f3cb490fe79b16baa5a917c8304b6fbfcfeb8729..087d57850304c45193a7f5de336953c1dec9cbba 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4349,11 +4349,19 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); size = sizeof(uint32_t); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) - seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) { + if (adev->flags & AMD_IS_APU) + seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff); + else + seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff); + } size = sizeof(uint32_t); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) - seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) { + if (adev->flags & AMD_IS_APU) + seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff); + else + seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff); + } size = sizeof(value); seq_printf(m, "\n"); @@ -4379,9 +4387,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* VCN clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { if (!value) { - seq_printf(m, "VCN: Disabled\n"); + seq_printf(m, "VCN: Powered down\n"); } else { - seq_printf(m, "VCN: Enabled\n"); + seq_printf(m, "VCN: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (DCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) @@ -4393,9 +4401,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* UVD clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { if (!value) { - seq_printf(m, "UVD: Disabled\n"); + seq_printf(m, "UVD: Powered down\n"); } else { - seq_printf(m, "UVD: Enabled\n"); + seq_printf(m, "UVD: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (DCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) @@ -4407,9 +4415,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* VCE clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { if (!value) { - seq_printf(m, "VCE: Disabled\n"); + seq_printf(m, "VCE: Powered down\n"); } else { - seq_printf(m, "VCE: Enabled\n"); + seq_printf(m, "VCE: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c index f2a55c1413f597a4d643d1a2c99367517bdff17e..17882f8dfdd34f92d5d37a9b0ee37f4a7d1bb406 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c @@ -200,7 +200,7 @@ static int get_platform_power_management_table( struct pp_hwmgr *hwmgr, ATOM_Tonga_PPM_Table *atom_ppm_table) { - struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL); + struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index b1a8799e2dee320390d239c6242600d4d30cdc39..aa91730e4eaffdf7760c844a7722aa1dedcb42d9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -3999,6 +3999,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, uint32_t sclk, mclk, activity_percent; uint32_t offset, val_vid; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; /* size must be at least 4 bytes for all sensors */ if (*size < 4) @@ -4042,7 +4043,21 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: - return smu7_get_gpu_power(hwmgr, (uint32_t *)value); + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); + else + return -EOPNOTSUPP; + case AMDGPU_PP_SENSOR_GPU_AVG_POWER: + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) + return -EOPNOTSUPP; + else + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); case AMDGPU_PP_SENSOR_VDDGFX: if ((data->vr_config & VRCONF_VDDGFX_MASK) == (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f1440869d1ce0597fee3f5819c80db505136cbfa..dd9bcbd630a1f9465285127dd23999f425bcc986 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1530,7 +1530,6 @@ static int aldebaran_i2c_control_init(struct smu_context *smu) smu_i2c->port = 0; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &aldebaran_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 231122622a9c06c8b255a48fcdd4231ffa58b7a9..a9b25faa63e468d0069ea08acfd7b90b1b36f056 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2696,7 +2696,6 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) smu_i2c->port = i; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v13_0_0_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 4ebc6b421c2cb44baea23225a34e31e9c47464b5..3c98a8a0386a2612d0470dd6dbd767a6ecc308b0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -970,7 +970,9 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, if (i < (clocks.num_levels - 1)) clk2 = clocks.data[i + 1].clocks_in_khz / 1000; - if (curr_clk >= clk1 && curr_clk < clk2) { + if (curr_clk == clk1) { + level = i; + } else if (curr_clk >= clk1 && curr_clk < clk2) { level = (curr_clk - clk1) <= (clk2 - curr_clk) ? i : i + 1; @@ -1936,7 +1938,6 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) smu_i2c->port = i; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v13_0_6_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); @@ -2235,17 +2236,18 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) continue; } - if (ret) { - dev_err(adev->dev, - "failed to send mode2 message \tparam: 0x%08x error code %d\n", - SMU_RESET_MODE_2, ret); + if (ret) goto out; - } + } while (ret == -ETIME && timeout); out: mutex_unlock(&smu->message_lock); + if (ret) + dev_err(adev->dev, "failed to send mode2 reset, error code %d", + ret); + return ret; } diff --git a/drivers/gpu/drm/ast/ast_i2c.c b/drivers/gpu/drm/ast/ast_i2c.c index 0e845e7acd9b5aeb091f4f473706968c6358056c..e5d3f7121de4206f4d1af450e450411e349eb1d8 100644 --- a/drivers/gpu/drm/ast/ast_i2c.c +++ b/drivers/gpu/drm/ast/ast_i2c.c @@ -120,7 +120,6 @@ struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev) return NULL; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 52d91a0df85e9ba3f6082b71b9218dd0f53e8a01..aca5bb0866f886c05f5914ec647242ee1326509a 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -515,7 +515,6 @@ static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi *hdmi) init_completion(&i2c->cmp); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->algo = &dw_hdmi_algorithm; diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index d72b6f9a352c10c13d6a66509d29372962dc4cef..b1ca3a1100dabbbad98279a65654a85266953e30 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2102,7 +2102,6 @@ int drm_dp_aux_register(struct drm_dp_aux *aux) if (!aux->ddc.algo) drm_dp_aux_init(aux); - aux->ddc.class = I2C_CLASS_DDC; aux->ddc.owner = THIS_MODULE; aux->ddc.dev.parent = aux->dev; diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 8ca01a6bf645d6f79ed202b2a45cc7265a7c7fa9..bd6c24d4213cdf2f6bcb132848330f43e4546efd 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5926,7 +5926,6 @@ static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port) aux->ddc.algo_data = aux; aux->ddc.retries = 3; - aux->ddc.class = I2C_CLASS_DDC; aux->ddc.owner = THIS_MODULE; /* FIXME: set the kdev of the port's connector as parent */ aux->ddc.dev.parent = parent_dev; diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c index 8992a95076f29e548d7576b066fb1c75ac1b46ee..dd1eb7e9877d46ba7dae136f5992ea510b627797 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_dp.c +++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c @@ -855,7 +855,6 @@ cdv_intel_dp_i2c_init(struct gma_connector *connector, memset(&intel_dp->adapter, '\0', sizeof (intel_dp->adapter)); intel_dp->adapter.owner = THIS_MODULE; - intel_dp->adapter.class = I2C_CLASS_DDC; strncpy (intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1); intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0'; intel_dp->adapter.algo_data = &intel_dp->algo; diff --git a/drivers/gpu/drm/gma500/intel_gmbus.c b/drivers/gpu/drm/gma500/intel_gmbus.c index 09cedabf4776dffb065a74157a8a04a857b77047..aa45509859f21ab9af69d2178473a7d091822068 100644 --- a/drivers/gpu/drm/gma500/intel_gmbus.c +++ b/drivers/gpu/drm/gma500/intel_gmbus.c @@ -411,7 +411,6 @@ int gma_intel_setup_gmbus(struct drm_device *dev) struct intel_gmbus *bus = &dev_priv->gmbus[i]; bus->adapter.owner = THIS_MODULE; - bus->adapter.class = I2C_CLASS_DDC; snprintf(bus->adapter.name, sizeof(bus->adapter.name), "gma500 gmbus %s", diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c index fc9a34ed58bd136f298a1cffb23ec2f88b8f2d70..6daa6669ed2374bf858bf68cefb7cd43007a9b28 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c @@ -168,7 +168,6 @@ static struct i2c_adapter oaktrail_hdmi_i2c_adapter = { .name = "oaktrail_hdmi_i2c", .nr = 3, .owner = THIS_MODULE, - .class = I2C_CLASS_DDC, .algo = &oaktrail_hdmi_i2c_algorithm, }; diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index d6fd5d72621609f40612a0ece610970b82707193..e4f914decebaecca7ac0e464fe9608a93d691791 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -2426,7 +2426,6 @@ psb_intel_sdvo_init_ddc_proxy(struct psb_intel_sdvo *sdvo, struct drm_device *dev) { sdvo->ddc.owner = THIS_MODULE; - sdvo->ddc.class = I2C_CLASS_DDC; snprintf(sdvo->ddc.name, I2C_NAME_SIZE, "SDVO DDC proxy"); sdvo->ddc.dev.parent = dev->dev; sdvo->ddc.algo_data = sdvo; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 410bd019bb357257054b92ec14b2df1d7b0aa82a..e6e48651c15c63550bbdb79d9742ba21ba26506b 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -81,7 +81,6 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector) { connector->adapter.owner = THIS_MODULE; - connector->adapter.class = I2C_CLASS_DDC; snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); connector->adapter.dev.parent = drm_dev->dev; i2c_set_adapdata(&connector->adapter, connector); diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 40d7b6f3f4891c8cbe6e6590f90243ca77291581..e9e4dcf345f95722b6c67cb4db802a193c84b0dd 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -899,7 +899,6 @@ int intel_gmbus_setup(struct drm_i915_private *i915) } bus->adapter.owner = THIS_MODULE; - bus->adapter.class = I2C_CLASS_DDC; snprintf(bus->adapter.name, sizeof(bus->adapter.name), "i915 gmbus %s", gmbus_pin->name); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 9218047495fb41980778f8850805e2178677d50e..acc6b6804105102389dc26c3fefce80444d0adad 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -3327,7 +3327,6 @@ intel_sdvo_init_ddc_proxy(struct intel_sdvo_ddc *ddc, ddc->ddc_bus = ddc_bus; ddc->ddc.owner = THIS_MODULE; - ddc->ddc.class = I2C_CLASS_DDC; snprintf(ddc->ddc.name, I2C_NAME_SIZE, "SDVO %c DDC%d", port_name(sdvo->base.port), ddc_bus); ddc->ddc.dev.parent = &pdev->dev; diff --git a/drivers/gpu/drm/loongson/lsdc_i2c.c b/drivers/gpu/drm/loongson/lsdc_i2c.c index 9625d0b1d0b4d81486e62227880b43d3f04c9c63..ce90c25536d240067054bf2c584bd564d465524d 100644 --- a/drivers/gpu/drm/loongson/lsdc_i2c.c +++ b/drivers/gpu/drm/loongson/lsdc_i2c.c @@ -154,7 +154,6 @@ int lsdc_create_i2c_chan(struct drm_device *ddev, adapter = &li2c->adapter; adapter->algo_data = &li2c->bit; adapter->owner = THIS_MODULE; - adapter->class = I2C_CLASS_DDC; adapter->dev.parent = ddev->dev; adapter->nr = -1; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index d675c954befe3cc3ec833ca4ff838371d58727d4..54e46e440e0f0ddcf5d3073fdd3a6d7b15d3b848 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -297,7 +297,6 @@ static int mtk_hdmi_ddc_probe(struct platform_device *pdev) strscpy(ddc->adap.name, "mediatek-hdmi-ddc", sizeof(ddc->adap.name)); ddc->adap.owner = THIS_MODULE; - ddc->adap.class = I2C_CLASS_DDC; ddc->adap.algo = &mtk_hdmi_ddc_algorithm; ddc->adap.retries = 3; ddc->adap.dev.of_node = dev->of_node; diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c index 0c48bdf3e7f800a02a00fcbf06b4d5737475e003..423eb302be7eb9def877158d5499b70c9a785905 100644 --- a/drivers/gpu/drm/mgag200/mgag200_i2c.c +++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c @@ -106,7 +106,6 @@ int mgag200_i2c_init(struct mga_device *mdev, struct mga_i2c_chan *i2c) i2c->data = BIT(info->i2c.data_bit); i2c->clock = BIT(info->i2c.clock_bit); i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c index de182c00484349c9d097dee02fff50970251a003..7aa500d24240ff3ed6694c469eafc4388c982346 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c @@ -249,7 +249,6 @@ struct i2c_adapter *msm_hdmi_i2c_init(struct hdmi *hdmi) i2c->owner = THIS_MODULE; - i2c->class = I2C_CLASS_DDC; snprintf(i2c->name, sizeof(i2c->name), "msm hdmi i2c"); i2c->dev.parent = &hdmi->pdev->dev; i2c->algo = &msm_hdmi_i2c_algorithm; diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index a6602c0126715635d6328c2fb295d4195b7dd873..3dda885df5b223dc2b637592e50cc7e958b5cbb7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c index e7e8fdf3adab7a0c9454f57b8ad91f4e787bc6da..29682722b0b36b584d4e3a8088d70e74507d78b7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c @@ -28,19 +28,14 @@ static void gp10b_ltc_init(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; - struct iommu_fwspec *spec; + u32 sid; nvkm_wr32(device, 0x17e27c, ltc->ltc_nr); nvkm_wr32(device, 0x17e000, ltc->ltc_nr); nvkm_wr32(device, 0x100800, ltc->ltc_nr); - spec = dev_iommu_fwspec_get(device->dev); - if (spec) { - u32 sid = spec->ids[0] & 0xffff; - - /* stream ID */ + if (tegra_dev_iommu_get_stream_id(device->dev, &sid)) nvkm_wr32(device, 0x160000, sid << 2); - } } static const struct nvkm_ltc_func diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 314d066e68e9d0dddb240f09526eedce8eac3828..3d174390a8afe7f23b886fbc99273cd9f76505f5 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -918,7 +918,6 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index f6d819803c0e02826c7250d9adf9c618360cb6f8..e6fbe040ccf6a9af8f54b6220dfa6c96d4cd0ee8 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -792,7 +792,6 @@ static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi) init_completion(&i2c->cmp); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->dev.of_node = hdmi->dev->of_node; diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index 62e6d8187de767ef5b893b6d7996474312141548..95cd1b49eda8a0c5c436caa9dfe5dca899776314 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -715,7 +715,6 @@ static struct i2c_adapter *rk3066_hdmi_i2c_adapter(struct rk3066_hdmi *hdmi) init_completion(&i2c->cmpltn); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->dev.of_node = hdmi->dev->of_node; diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c index d1a65a921f5afaf5f3332239a7a9f6be44e8c475..f5f62eb0eecaaa469687c19e921ce56811c5af61 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c @@ -302,7 +302,6 @@ int sun4i_hdmi_i2c_create(struct device *dev, struct sun4i_hdmi *hdmi) return -ENOMEM; adap->owner = THIS_MODULE; - adap->class = I2C_CLASS_DDC; adap->algo = &sun4i_hdmi_i2c_algorithm; strscpy(adap->name, "sun4i_hdmi_i2c adapter", sizeof(adap->name)); i2c_set_adapdata(adap, hdmi); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f5187b384ae9ac8eedede8e6a0d4d56eb8af1670..4130945052ed2a523d50e1d39d028ca3741c6605 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -195,7 +195,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bool use_dma_alloc, bool use_dma32) { struct ttm_global *glob = &ttm_glob; - int ret; + int ret, nid; if (WARN_ON(vma_manager == NULL)) return -EINVAL; @@ -215,7 +215,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); + if (dev) + nid = dev_to_node(dev); + else + nid = NUMA_NO_NODE; + + ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1cced50d8d8c9dadcb40f7ebe75cf7474170772b..e36ae1f0d8859fc82f2e0a9ed06f8e7ee6387372 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -47,7 +47,7 @@ config DRM_XE config DRM_XE_DISPLAY bool "Enable display support" - depends on DRM_XE && EXPERT && DRM_XE=m + depends on DRM_XE && DRM_XE=m select FB_IOMEM_HELPERS select I2C select I2C_ALGOBIT diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 53bd2a8ba1ae5cea2535c59047d028f18bec8e65..efcf0ab7a1a69d35271b5655c5a746e992459b02 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-truncation) -subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 412b2e7ce40cb3ea38b6f5c76fb293009c10c3a2..3436fd9cf2b2738446608990a5f5be1a4f33fb2e 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, bo_flags); - - xe_bo_lock(bo, false); - if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; } + xe_bo_lock(bo, false); + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, test); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 7a32faa2f68880dcd4cb5b217ff9986153b990cb..a6523df0f1d39fbe7f0354d404f95886a3d56424 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, - &src_it, XE_PAGE_SIZE, pt); + &src_it, XE_PAGE_SIZE, pt->ttm.resource); run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8e4a3b1f6b938e5a76b8fc6640058ca63a30c709..0b0e262e2166d69da1063915fa4c6eeedfd38bd6 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) static void try_add_system(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, }; @@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct xe_mem_region *vram; u64 io_size; + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; xe_assert(xe, vram && vram->usable_size); io_size = vram->io_size; @@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo->props.preferred_gt == XE_GT1) { if (bo_flags & XE_BO_CREATE_VRAM1_BIT) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); @@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | @@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, if (vram->mapping && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)vram->mapping + + mem->bus.addr = (u8 __force *)vram->mapping + mem->bus.offset; mem->bus.offset += vram->io_start; @@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { struct xe_mem_region *vram = res_to_mem_region(new_mem); - void *new_addr = vram->mapping + + void __iomem *new_addr = vram->mapping + (new_mem->start << PAGE_SHIFT); if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d9ae77fe7382ddf9997858995fe255108f7c5944..b8d8da5466708c6903ccb3ade3852a7ac9911235 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe) err = xe_device_set_has_flat_ccs(xe); if (err) - return err; + goto err_irq_shutdown; err = xe_mmio_probe_vram(xe); if (err) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c45ef17b347323801d397a964e53aa3fc5b060f4..5dc9127a20293e1ebb56c3684e2fdb7e6f425b43 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -97,7 +97,7 @@ struct xe_mem_region { */ resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ - void *__iomem mapping; + void __iomem *mapping; }; /** @@ -146,7 +146,7 @@ struct xe_tile { size_t size; /** @regs: pointer to tile's MMIO space (starting with registers) */ - void *regs; + void __iomem *regs; } mmio; /** @@ -159,7 +159,7 @@ struct xe_tile { size_t size; /** @regs: pointer to tile's additional MMIO-extension space */ - void *regs; + void __iomem *regs; } mmio_ext; /** @mem: memory management info for tile */ @@ -301,7 +301,7 @@ struct xe_device { /** @size: size of MMIO space for device */ size_t size; /** @regs: pointer to MMIO space for device */ - void *regs; + void __iomem *regs; } mmio; /** @mem: memory info for device */ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index d30c0d0689bcc7d4ae55cdd7fc93b116826160e6..b853feed9ccc15eefab7f0ccdf070096521e6015 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; - bool write_locked; + bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; @@ -227,7 +227,8 @@ retry: } if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { - err = -EWOULDBLOCK; + err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ + skip_retry = true; goto err_exec; } @@ -337,7 +338,7 @@ err_unlock_list: up_write(&vm->lock); else up_read(&vm->lock); - if (err == -EAGAIN) + if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: for (i = 0; i < num_syncs; i++) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 44fe8097b7cdac8d5c89a3bf00168cf3b8343ca7..bcfc4127c7c59f0fffc8e40df70a5b1c8222495f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = hwe->eclass->sched_props.preempt_timeout_us; + if (q->flags & EXEC_QUEUE_FLAG_KERNEL && + q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; + else + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 3d7e704ec3d9f33b9c0e47867bb58135ea01df91..8d4b7feb8c306b8a406a46f74c5cad2a430bdef3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -52,8 +52,6 @@ struct xe_exec_queue { struct xe_vm *vm; /** @class: class of this exec queue */ enum xe_engine_class class; - /** @priority: priority of this exec queue */ - enum xe_exec_queue_priority priority; /** * @logical_mask: logical mask of where job submitted to exec queue can run */ @@ -84,6 +82,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_VM BIT(4) /* child of VM queue for multi-tile VM jobs */ #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +/* kernel exec_queue only, set priority to highest level */ +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -142,6 +142,8 @@ struct xe_exec_queue { u32 timeslice_us; /** @preempt_timeout_us: preemption timeout in micro-seconds */ u32 preempt_timeout_us; + /** @priority: priority of this exec queue */ + enum xe_exec_queue_priority priority; } sched_props; /** @compute: compute exec queue state */ diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 3adfa6686e7cf9eb2763bccc28b7a0a382dd4834..e5b0f4ecdbe8261ee5c3fa9530a30dc2fd46c14b 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int err; + if (xe->info.skip_guc_pc) + return; + gt->freq = kobject_create_and_add("freq0", gt->sysfs); if (!gt->freq) { drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 482cb0df9f15bc28d9f5c2a0ea194319f1b2a21a..0a61390c64a7b7100113641f2e073f4ce58d358e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) static u32 guc_ctl_feature_flags(struct xe_guc *guc) { - return GUC_CTL_ENABLE_SLPC; + u32 flags = 0; + + if (!guc_to_xe(guc)->info.skip_guc_pc) + flags |= GUC_CTL_ENABLE_SLPC; + + return flags; } static u32 guc_ctl_log_params_flags(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 21ac68e3246f86f1e16d05880da8aa9315769ecb..54ffcfcdd41f9ce3c590f5814fcbe3d3535946ac 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; struct xe_device *xe = guc_to_xe(guc); - enum xe_exec_queue_priority prio = q->priority; + enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; @@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) err = xe_sched_entity_init(&ge->entity, sched); if (err) goto err_sched; - q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; if (xe_exec_queue_is_lr(q)) INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); @@ -1301,15 +1300,15 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, { struct xe_sched_msg *msg; - if (q->priority == priority || exec_queue_killed_or_banned(q)) + if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; + q->sched_props.priority = priority; guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - q->priority = priority; return 0; } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index adf1dab5eba253297fb8b4ae4c2c5b5f15b2ec7a..e05e9e7282b68abdcab839a9134efd09e60750f2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -62,6 +62,8 @@ struct xe_migrate { * out of the pt_bo. */ struct drm_suballoc_manager vm_update_sa; + /** @min_chunk_size: For dgfx, Minimum chunk size */ + u64 min_chunk_size; }; #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ @@ -344,7 +346,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_HIGH_PRIORITY); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, @@ -355,8 +358,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) xe_vm_close_and_put(vm); return ERR_CAST(m->q); } - if (xe->info.has_usm) - m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; mutex_init(&m->job_mutex); @@ -364,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (err) return ERR_PTR(err); + if (IS_DGFX(xe)) { + if (xe_device_has_flat_ccs(xe)) + /* min chunk size corresponds to 4K of CCS Metadata */ + m->min_chunk_size = SZ_4K * SZ_64K / + xe_device_ccs_bytes(xe, SZ_64K); + else + /* Somewhat arbitrary to avoid a huge amount of blits */ + m->min_chunk_size = SZ_64K; + m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size); + drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n", + (unsigned long long)m->min_chunk_size); + } + return m; } @@ -375,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe) return MAX_PREEMPTDISABLE_TRANSFER; } -static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) +static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur) { - /* - * For VRAM we use identity mapped pages so we are limited to current - * cursor size. For system we program the pages ourselves so we have no - * such limitation. - */ - return min_t(u64, max_mem_transfer_per_pass(xe), - mem_type_is_vram(cur->mem_type) ? cur->size : - cur->remaining); + struct xe_device *xe = tile_to_xe(m->tile); + u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining); + + if (mem_type_is_vram(cur->mem_type)) { + /* + * VRAM we want to blit in chunks with sizes aligned to + * min_chunk_size in order for the offset to CCS metadata to be + * page-aligned. If it's the last chunk it may be smaller. + * + * Another constraint is that we need to limit the blit to + * the VRAM block size, unless size is smaller than + * min_chunk_size. + */ + u64 chunk = max_t(u64, cur->size, m->min_chunk_size); + + size = min_t(u64, size, chunk); + if (size > m->min_chunk_size) + size = round_down(size, m->min_chunk_size); + } + + return size; +} + +static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur) +{ + /* If the chunk is not fragmented, allow identity map. */ + return cur->size >= size; } static u32 pte_update_size(struct xe_migrate *m, @@ -397,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m, u32 cmds = 0; *L0_pt = pt_ofs; - if (!is_vram) { + if (is_vram && xe_migrate_allow_identity(*L0, cur)) { + /* Offset into identity map. */ + *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), + cur->start + vram_region_gpu_offset(res)); + cmds += cmd_size; + } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); @@ -413,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m, /* Each chunk has a single blit command */ cmds += cmd_size; - } else { - /* Offset into identity map. */ - *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), - cur->start + vram_region_gpu_offset(res)); - cmds += cmd_size; } return cmds; @@ -427,10 +460,10 @@ static void emit_pte(struct xe_migrate *m, struct xe_bb *bb, u32 at_pt, bool is_vram, bool is_comp_pte, struct xe_res_cursor *cur, - u32 size, struct xe_bo *bo) + u32 size, struct ttm_resource *res) { struct xe_device *xe = tile_to_xe(m->tile); - + struct xe_vm *vm = m->q->vm; u16 pat_index; u32 ptes; u64 ofs = at_pt * XE_PAGE_SIZE; @@ -443,13 +476,6 @@ static void emit_pte(struct xe_migrate *m, else pat_index = xe->pat.idx[XE_CACHE_WB]; - /* - * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently - * we're only emitting VRAM PTEs during sanity tests, so when - * that's moved to a Kunit test, we should condition VRAM PTEs - * on running tests. - */ - ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { @@ -469,20 +495,22 @@ static void emit_pte(struct xe_migrate *m, addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { - /* Is this a 64K PTE entry? */ - if ((m->q->vm->flags & XE_VM_FLAG_64K) && - !(cur_ofs & (16 * 8 - 1))) { - xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); + if (vm->flags & XE_VM_FLAG_64K) { + u64 va = cur_ofs * XE_PAGE_SIZE / 8; + + xe_assert(xe, (va & (SZ_64K - 1)) == + (addr & (SZ_64K - 1))); + flags |= XE_PTE_PS64; } - addr += vram_region_gpu_offset(bo->ttm.resource); + addr += vram_region_gpu_offset(res); devmem = true; } - addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, - addr, pat_index, - 0, devmem, flags); + addr = vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, pat_index, + 0, devmem, flags); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); @@ -694,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - src_L0 = xe_migrate_res_sizes(xe, &src_it); - dst_L0 = xe_migrate_res_sizes(xe, &dst_it); + src_L0 = xe_migrate_res_sizes(m, &src_it); + dst_L0 = xe_migrate_res_sizes(m, &dst_it); drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", pass++, src_L0, dst_L0); @@ -716,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, &ccs_ofs, &ccs_pt, 0, 2 * avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); } /* Add copy commands size here */ @@ -728,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err_sync; } - if (!src_is_vram) - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, - src_bo); - else + if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); - - if (!dst_is_vram) - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, - dst_bo); else + emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, + src); + + if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); + else + emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, + dst); if (copy_system_ccs) - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -950,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - clear_L0 = xe_migrate_res_sizes(xe, &src_it); + clear_L0 = xe_migrate_res_sizes(m, &src_it); drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); @@ -977,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; /* Preemption is enabled again by the ring ops. */ - if (!clear_vram) { - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, - bo); - } else { + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); - } + else + emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, + dst); + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f660cfb79f504e264f9a3ced4a5f5544fada74e0..c8c5d74b6e9041ec53c38ba81d184b83037427ab 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe) u8 id, tile_count = xe->info.tile_count; struct xe_gt *gt = xe_root_mmio_gt(xe); struct xe_tile *tile; - void *regs; + void __iomem *regs; u32 mtcfg; if (tile_count == 1) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d2b00d0bf1e203c8b9c9322bcf8a489b1409821b..e5d7d5e2bec129937317f14f6b0063308a19bf58 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr { /* GPU base offset */ resource_size_t stolen_base; - void *__iomem mapping; + void __iomem *mapping; }; static inline struct xe_ttm_stolen_mgr * @@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) - mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; + mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; mem->bus.offset += mgr->io_base; mem->bus.is_iomem = true; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0cfe7289b97efddd3d3b1177b6e91ebafd22fdc9..10b6995fbf294690a36234dc3c36bf741245b7a9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) down_write(&vm->lock); err = drm_gpuvm_exec_lock(&vm_exec); if (err) - return err; + goto out_up_write; pfence = xe_preempt_fence_create(q, q->compute.context, ++q->compute.seqno); if (!pfence) { err = -ENOMEM; - goto out_unlock; + goto out_fini; } list_add(&q->compute.link, &vm->preempt.exec_queues); @@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_read(&vm->userptr.notifier_lock); -out_unlock: +out_fini: drm_exec_fini(exec); +out_up_write: up_write(&vm->lock); return err; @@ -2063,9 +2064,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (err) return ERR_PTR(err); - vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); - if (!vm_bo) - break; + vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); + if (IS_ERR(vm_bo)) { + xe_bo_unlock(bo); + return ERR_CAST(vm_bo); + } ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); drm_gpuvm_bo_put(vm_bo); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4372f5d146ab22edaf948622648df24e741ec2f6..0285a74363b3d11e35b2e29aa86e1861e1900f00 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -488,7 +488,7 @@ void hv_setup_dma_ops(struct device *dev, bool coherent) * Hyper-V does not offer a vIOMMU in the guest * VM, so pass 0/NULL for the IOMMU settings */ - arch_setup_dma_ops(dev, 0, 0, NULL, coherent); + arch_setup_dma_ops(dev, 0, 0, coherent); } EXPORT_SYMBOL_GPL(hv_setup_dma_ops); diff --git a/drivers/hwmon/npcm750-pwm-fan.c b/drivers/hwmon/npcm750-pwm-fan.c index d9733da8ea34f558e10ec8e27848d22483856232..904816abb7c468c1476d0c4581cea68d1d24a641 100644 --- a/drivers/hwmon/npcm750-pwm-fan.c +++ b/drivers/hwmon/npcm750-pwm-fan.c @@ -195,6 +195,7 @@ struct npcm7xx_cooling_device { struct npcm7xx_pwm_fan_data { void __iomem *pwm_base; void __iomem *fan_base; + int pwm_modules; unsigned long pwm_clk_freq; unsigned long fan_clk_freq; struct clk *pwm_clk; @@ -710,7 +711,7 @@ static u32 npcm7xx_pwm_init(struct npcm7xx_pwm_fan_data *data) /* Setting PWM Prescale Register value register to both modules */ prescale_val |= (prescale_val << NPCM7XX_PWM_PRESCALE_SHIFT_CH01); - for (m = 0; m < NPCM7XX_PWM_MAX_MODULES ; m++) { + for (m = 0; m < data->pwm_modules; m++) { iowrite32(prescale_val, NPCM7XX_PWM_REG_PR(data->pwm_base, m)); iowrite32(NPCM7XX_PWM_PRESCALE2_DEFAULT, NPCM7XX_PWM_REG_CSR(data->pwm_base, m)); @@ -946,6 +947,8 @@ static int npcm7xx_pwm_fan_probe(struct platform_device *pdev) if (!data->info) return -EINVAL; + data->pwm_modules = data->info->pwm_max_channel / NPCM7XX_PWM_MAX_CHN_NUM_IN_A_MODULE; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pwm"); if (!res) { dev_err(dev, "pwm resource not found\n"); @@ -983,7 +986,7 @@ static int npcm7xx_pwm_fan_probe(struct platform_device *pdev) output_freq = npcm7xx_pwm_init(data); npcm7xx_fan_init(data); - for (cnt = 0; cnt < NPCM7XX_PWM_MAX_MODULES ; cnt++) + for (cnt = 0; cnt < data->pwm_modules; cnt++) mutex_init(&data->pwm_lock[cnt]); for (i = 0; i < NPCM7XX_FAN_MAX_MODULE; i++) { diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index ee83c4581bce059205515db11621d915e3023836..461eb23f9d476786bebadbbb79c888f6aa0a057f 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -477,7 +477,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter ali1535_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index 55a9e93fbfeb56a55d36795e3bca21c101a12f03..307fb0666ecb2f296af23926fe3d5728d78fb74b 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -390,7 +390,7 @@ static const struct i2c_algorithm ali1563_algorithm = { static struct i2c_adapter ali1563_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &ali1563_algorithm, }; diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 0231c5be6354f4ff40b2dc0c8ab57bd191511e3c..d2fa30deb054c7fafe0d2e1749ca53e684a1fa32 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -461,7 +461,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter ali15x3_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c index ef1307a258e952704ba4c048ee2e0eefa42e7989..208310db906dfba418a95d3ac3b38234d0712c77 100644 --- a/drivers/i2c/busses/i2c-amd756.c +++ b/drivers/i2c/busses/i2c-amd756.c @@ -285,7 +285,7 @@ static const struct i2c_algorithm smbus_algorithm = { struct i2c_adapter amd756_smbus = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 1ed7e945bb6d107eeed7f5e90210c99019b3c712..42a9b1221065f6c2d05667de38cb1b41967bd518 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -449,7 +449,7 @@ static int amd8111_probe(struct pci_dev *dev, const struct pci_device_id *id) smbus->adapter.owner = THIS_MODULE; snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), "SMBus2 AMD8111 adapter at %04x", smbus->base); - smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + smbus->adapter.class = I2C_CLASS_HWMON; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 9a664abf734d6627889a33f7f0729c384a8322e0..4404b4aac6765b15b27840fd0a43ee8551f43aed 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -658,7 +658,7 @@ static int cpm_i2c_probe(struct platform_device *ofdev) /* register new adapter to i2c module... */ data = of_get_property(ofdev->dev.of_node, "linux,i2c-index", &len); - cpm->adap.nr = (data && len == 4) ? be32_to_cpup(data) : -1; + cpm->adap.nr = (data && len == 4) ? *data : -1; result = i2c_add_numbered_adapter(&cpm->adap); if (result < 0) diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c index b0f50dce9d0fe4b4ae56c2cdc4ade814560c6014..cfe8665cacd270d9e8599106a06d7a0e9237cfae 100644 --- a/drivers/i2c/busses/i2c-elektor.c +++ b/drivers/i2c/busses/i2c-elektor.c @@ -188,7 +188,7 @@ static struct i2c_algo_pcf_data pcf_isa_data = { static struct i2c_adapter pcf_isa_ops = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo_data = &pcf_isa_data, .name = "i2c-elektor", }; diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index fb35a75fe0e32f54bac273b4a24404c16bf7e663..4f1411b1a775452801dafaed734fab0be7d67ce8 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -25,7 +25,6 @@ struct i2c_gpio_private_data { struct i2c_algo_bit_data bit_data; struct i2c_gpio_platform_data pdata; #ifdef CONFIG_I2C_GPIO_FAULT_INJECTOR - struct dentry *debug_dir; /* these must be protected by bus lock */ struct completion scl_irq_completion; u64 scl_irq_data; @@ -72,7 +71,6 @@ static int i2c_gpio_getscl(void *data) } #ifdef CONFIG_I2C_GPIO_FAULT_INJECTOR -static struct dentry *i2c_gpio_debug_dir; #define setsda(bd, val) ((bd)->setsda((bd)->data, val)) #define setscl(bd, val) ((bd)->setscl((bd)->data, val)) @@ -258,41 +256,23 @@ static void i2c_gpio_fault_injector_init(struct platform_device *pdev) { struct i2c_gpio_private_data *priv = platform_get_drvdata(pdev); - /* - * If there will be a debugfs-dir per i2c adapter somewhen, put the - * 'fault-injector' dir there. Until then, we have a global dir with - * all adapters as subdirs. - */ - if (!i2c_gpio_debug_dir) - i2c_gpio_debug_dir = debugfs_create_dir("i2c-fault-injector", NULL); - - priv->debug_dir = debugfs_create_dir(pdev->name, i2c_gpio_debug_dir); - init_completion(&priv->scl_irq_completion); - debugfs_create_file_unsafe("incomplete_address_phase", 0200, priv->debug_dir, + debugfs_create_file_unsafe("incomplete_address_phase", 0200, priv->adap.debugfs, priv, &fops_incomplete_addr_phase); - debugfs_create_file_unsafe("incomplete_write_byte", 0200, priv->debug_dir, + debugfs_create_file_unsafe("incomplete_write_byte", 0200, priv->adap.debugfs, priv, &fops_incomplete_write_byte); if (priv->bit_data.getscl) { - debugfs_create_file_unsafe("inject_panic", 0200, priv->debug_dir, + debugfs_create_file_unsafe("inject_panic", 0200, priv->adap.debugfs, priv, &fops_inject_panic); - debugfs_create_file_unsafe("lose_arbitration", 0200, priv->debug_dir, + debugfs_create_file_unsafe("lose_arbitration", 0200, priv->adap.debugfs, priv, &fops_lose_arbitration); } - debugfs_create_file_unsafe("scl", 0600, priv->debug_dir, priv, &fops_scl); - debugfs_create_file_unsafe("sda", 0600, priv->debug_dir, priv, &fops_sda); -} - -static void i2c_gpio_fault_injector_exit(struct platform_device *pdev) -{ - struct i2c_gpio_private_data *priv = platform_get_drvdata(pdev); - - debugfs_remove_recursive(priv->debug_dir); + debugfs_create_file_unsafe("scl", 0600, priv->adap.debugfs, priv, &fops_scl); + debugfs_create_file_unsafe("sda", 0600, priv->adap.debugfs, priv, &fops_sda); } #else static inline void i2c_gpio_fault_injector_init(struct platform_device *pdev) {} -static inline void i2c_gpio_fault_injector_exit(struct platform_device *pdev) {} #endif /* CONFIG_I2C_GPIO_FAULT_INJECTOR*/ /* Get i2c-gpio properties from DT or ACPI table */ @@ -444,7 +424,7 @@ static int i2c_gpio_probe(struct platform_device *pdev) snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id); adap->algo_data = bit_data; - adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adap->class = I2C_CLASS_HWMON; adap->dev.parent = dev; device_set_node(&adap->dev, fwnode); @@ -475,8 +455,6 @@ static void i2c_gpio_remove(struct platform_device *pdev) struct i2c_gpio_private_data *priv; struct i2c_adapter *adap; - i2c_gpio_fault_injector_exit(pdev); - priv = platform_get_drvdata(pdev); adap = &priv->adap; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 070999139c6dcbe24ddde994543cf1ec9df74e82..3932e8d96a17173fa3b4f7ad90ebcbb786e99370 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1230,8 +1230,10 @@ static const struct { * Additional individual entries were added after verification. */ { "Latitude 5480", 0x29 }, + { "Precision 3540", 0x29 }, { "Vostro V131", 0x1d }, { "Vostro 5568", 0x29 }, + { "XPS 15 7590", 0x29 }, }; static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 408820319ec48e6aa9258aab21996ec0e09c8656..7fb87b78923e460222fe633e0229aa18df7e608d 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -739,7 +739,7 @@ static int iic_probe(struct platform_device *ofdev) adap->dev.of_node = of_node_get(np); strscpy(adap->name, "IBM IIC", sizeof(adap->name)); i2c_set_adapdata(adap, dev); - adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adap->class = I2C_CLASS_HWMON; adap->algo = &iic_algo; adap->timeout = HZ; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 1775a79aeba2afa64b1ad3e0e22ac823deaeb48e..88a053987403cc6f59c3def73fd52cd11e2b1359 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1401,7 +1401,7 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx, PINCTRL_STATE_DEFAULT); i2c_imx->pinctrl_pins_gpio = pinctrl_lookup_state(i2c_imx->pinctrl, "gpio"); - rinfo->sda_gpiod = devm_gpiod_get(&pdev->dev, "sda", GPIOD_IN); + rinfo->sda_gpiod = devm_gpiod_get_optional(&pdev->dev, "sda", GPIOD_IN); rinfo->scl_gpiod = devm_gpiod_get(&pdev->dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); if (PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER || diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index f2f7ebeeaecb0345edb1e8c9cc7aa222d1970d88..2e5f0165c3d3016ee613517838b223af10480a35 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -478,7 +478,7 @@ iop3xx_i2c_probe(struct platform_device *pdev) memcpy(new_adapter->name, pdev->name, strlen(pdev->name)); new_adapter->owner = THIS_MODULE; - new_adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + new_adapter->class = I2C_CLASS_HWMON; new_adapter->dev.parent = &pdev->dev; new_adapter->dev.of_node = pdev->dev.of_node; new_adapter->nr = pdev->id; diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c index 1dc1ceaa44439f67566008606f6b543e8f386509..416a9968ed2870a26f0a2b735f33e82bba85aa5a 100644 --- a/drivers/i2c/busses/i2c-isch.c +++ b/drivers/i2c/busses/i2c-isch.c @@ -249,7 +249,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sch_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-kempld.c b/drivers/i2c/busses/i2c-kempld.c index e01d7530828840950273a3e332bb99ced6bf2e6b..c3a529a73b5bc119a9b829d9ea1c31dda0e41c46 100644 --- a/drivers/i2c/busses/i2c-kempld.c +++ b/drivers/i2c/busses/i2c-kempld.c @@ -283,8 +283,7 @@ static const struct i2c_algorithm kempld_i2c_algorithm = { static const struct i2c_adapter kempld_i2c_adapter = { .owner = THIS_MODULE, .name = "i2c-kempld", - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD | - I2C_CLASS_DEPRECATED, + .class = I2C_CLASS_HWMON | I2C_CLASS_DEPRECATED, .algo = &kempld_i2c_algorithm, }; diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 6fec64ea67fbc11bffcf2c0a8c1604ea804bca33..099291a0411dec07664bb98fbabf1b4e4e19adf4 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -477,7 +477,7 @@ static const struct i2c_adapter_quirks mlxcpld_i2c_quirks_ext2 = { static struct i2c_adapter mlxcpld_i2c_adapter = { .owner = THIS_MODULE, .name = "i2c-mlxcpld", - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &mlxcpld_i2c_algo, .quirks = &mlxcpld_i2c_quirks, .retries = MLXCPLD_I2C_RETR_NUM, diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index 38d203d93eeec4bde521ddb034a1e4bd1fc88da0..fab662e6bc084fd1ca573685f60d6c4406b6b0e4 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -349,7 +349,7 @@ static int nforce2_probe_smb(struct pci_dev *dev, int bar, int alt_reg, return -EBUSY; } smbus->adapter.owner = THIS_MODULE; - smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + smbus->adapter.class = I2C_CLASS_HWMON; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; smbus->adapter.dev.parent = &dev->dev; diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index ae4bae63ad4f3c3086635928acdd9b46deb10c28..54181b3f1919625a83010efbb7b439f8fd2316b7 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -326,7 +326,6 @@ struct npcm_i2c { u8 slv_rd_buf[MAX_I2C_HW_FIFO_SIZE]; u8 slv_wr_buf[MAX_I2C_HW_FIFO_SIZE]; #endif - struct dentry *debugfs; /* debugfs device directory */ u64 ber_cnt; u64 rec_succ_cnt; u64 rec_fail_cnt; @@ -2250,27 +2249,15 @@ static const struct i2c_algorithm npcm_i2c_algo = { #endif }; -/* i2c debugfs directory: used to keep health monitor of i2c devices */ -static struct dentry *npcm_i2c_debugfs_dir; - static void npcm_i2c_init_debugfs(struct platform_device *pdev, struct npcm_i2c *bus) { - struct dentry *d; - - if (!npcm_i2c_debugfs_dir) - return; - d = debugfs_create_dir(dev_name(&pdev->dev), npcm_i2c_debugfs_dir); - if (IS_ERR_OR_NULL(d)) - return; - debugfs_create_u64("ber_cnt", 0444, d, &bus->ber_cnt); - debugfs_create_u64("nack_cnt", 0444, d, &bus->nack_cnt); - debugfs_create_u64("rec_succ_cnt", 0444, d, &bus->rec_succ_cnt); - debugfs_create_u64("rec_fail_cnt", 0444, d, &bus->rec_fail_cnt); - debugfs_create_u64("timeout_cnt", 0444, d, &bus->timeout_cnt); - debugfs_create_u64("tx_complete_cnt", 0444, d, &bus->tx_complete_cnt); - - bus->debugfs = d; + debugfs_create_u64("ber_cnt", 0444, bus->adap.debugfs, &bus->ber_cnt); + debugfs_create_u64("nack_cnt", 0444, bus->adap.debugfs, &bus->nack_cnt); + debugfs_create_u64("rec_succ_cnt", 0444, bus->adap.debugfs, &bus->rec_succ_cnt); + debugfs_create_u64("rec_fail_cnt", 0444, bus->adap.debugfs, &bus->rec_fail_cnt); + debugfs_create_u64("timeout_cnt", 0444, bus->adap.debugfs, &bus->timeout_cnt); + debugfs_create_u64("tx_complete_cnt", 0444, bus->adap.debugfs, &bus->tx_complete_cnt); } static int npcm_i2c_probe_bus(struct platform_device *pdev) @@ -2362,7 +2349,6 @@ static void npcm_i2c_remove_bus(struct platform_device *pdev) unsigned long lock_flags; struct npcm_i2c *bus = platform_get_drvdata(pdev); - debugfs_remove_recursive(bus->debugfs); spin_lock_irqsave(&bus->lock, lock_flags); npcm_i2c_disable(bus); spin_unlock_irqrestore(&bus->lock, lock_flags); @@ -2385,28 +2371,7 @@ static struct platform_driver npcm_i2c_bus_driver = { } }; -static int __init npcm_i2c_init(void) -{ - int ret; - - npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - - ret = platform_driver_register(&npcm_i2c_bus_driver); - if (ret) { - debugfs_remove_recursive(npcm_i2c_debugfs_dir); - return ret; - } - - return 0; -} -module_init(npcm_i2c_init); - -static void __exit npcm_i2c_exit(void) -{ - platform_driver_unregister(&npcm_i2c_bus_driver); - debugfs_remove_recursive(npcm_i2c_debugfs_dir); -} -module_exit(npcm_i2c_exit); +module_platform_driver(npcm_i2c_bus_driver); MODULE_AUTHOR("Avi Fishman "); MODULE_AUTHOR("Tali Perry "); diff --git a/drivers/i2c/busses/i2c-pasemi-pci.c b/drivers/i2c/busses/i2c-pasemi-pci.c index cfc89e04eb94cc5e27275a2ca1a72a5379773bf5..77f90c7436eda2df16afd7f1cac79355fb005bfd 100644 --- a/drivers/i2c/busses/i2c-pasemi-pci.c +++ b/drivers/i2c/busses/i2c-pasemi-pci.c @@ -56,7 +56,7 @@ static int pasemi_smb_pci_probe(struct pci_dev *dev, if (!smbus->ioaddr) return -EBUSY; - smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + smbus->adapter.class = I2C_CLASS_HWMON; error = pasemi_i2c_common_probe(smbus); if (error) return error; diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 809fbd014cd6833749a677bba4b6845854459d3b..6a0392172b2f2ea643c70080d127232427d32873 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -943,7 +943,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, } adap->owner = THIS_MODULE; - adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adap->class = I2C_CLASS_HWMON; adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800 : &smbus_algorithm; diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 829ac053bbb7c4a1203a7fc878b2aebf28600d22..828aa2ea0fe4c90785cbdc5e65b5b28cdf86b40b 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -89,6 +89,7 @@ #define TMDMAE BIT(0) /* DMA Master Transmitted Enable */ /* ICCCR2 */ +#define FMPE BIT(7) /* Fast Mode Plus Enable */ #define CDFD BIT(2) /* CDF Disable */ #define HLSE BIT(1) /* HIGH/LOW Separate Control Enable */ #define SME BIT(0) /* SCL Mask Enable */ @@ -122,16 +123,18 @@ #define ID_NACK BIT(4) #define ID_EPROTO BIT(5) /* persistent flags */ +#define ID_P_FMPLUS BIT(27) #define ID_P_NOT_ATOMIC BIT(28) #define ID_P_HOST_NOTIFY BIT(29) #define ID_P_NO_RXDMA BIT(30) /* HW forbids RXDMA sometimes */ #define ID_P_PM_BLOCKED BIT(31) -#define ID_P_MASK GENMASK(31, 28) +#define ID_P_MASK GENMASK(31, 27) enum rcar_i2c_type { I2C_RCAR_GEN1, I2C_RCAR_GEN2, I2C_RCAR_GEN3, + I2C_RCAR_GEN4, }; struct rcar_i2c_priv { @@ -148,6 +151,7 @@ struct rcar_i2c_priv { u32 icccr; u16 schd; u16 scld; + u8 smd; u8 recovery_icmcr; /* protected by adapter lock */ enum rcar_i2c_type devtype; struct i2c_client *slave; @@ -239,9 +243,14 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv) if (priv->devtype < I2C_RCAR_GEN3) { rcar_i2c_write(priv, ICCCR, priv->icccr); } else { - rcar_i2c_write(priv, ICCCR2, CDFD | HLSE | SME); + u32 icccr2 = CDFD | HLSE | SME; + + if (priv->flags & ID_P_FMPLUS) + icccr2 |= FMPE; + + rcar_i2c_write(priv, ICCCR2, icccr2); rcar_i2c_write(priv, ICCCR, priv->icccr); - rcar_i2c_write(priv, ICMPR, RCAR_DEFAULT_SMD); + rcar_i2c_write(priv, ICMPR, priv->smd); rcar_i2c_write(priv, ICHPR, priv->schd); rcar_i2c_write(priv, ICLPR, priv->scld); rcar_i2c_write(priv, ICFBSCR, TCYC17); @@ -278,6 +287,7 @@ static int rcar_i2c_clock_calculate(struct rcar_i2c_priv *priv) /* Fall back to previously used values if not supplied */ i2c_parse_fw_timings(dev, &t, false); + priv->smd = RCAR_DEFAULT_SMD; /* * calculate SCL clock @@ -303,6 +313,11 @@ static int rcar_i2c_clock_calculate(struct rcar_i2c_priv *priv) if (cdf >= 1U << cdf_width) goto err_no_val; + if (t.bus_freq_hz > I2C_MAX_FAST_MODE_FREQ && priv->devtype >= I2C_RCAR_GEN4) + priv->flags |= ID_P_FMPLUS; + else + priv->flags &= ~ID_P_FMPLUS; + /* On Gen3+, we use cdf only for the filters, not as a SCL divider */ ick = rate / (priv->devtype < I2C_RCAR_GEN3 ? (cdf + 1) : 1); @@ -344,30 +359,30 @@ static int rcar_i2c_clock_calculate(struct rcar_i2c_priv *priv) * x as a base value for the SCLD/SCHD ratio: * * SCL = clkp / (8 + 2 * SMD + SCLD + SCHD + F[(ticf + tr + intd) * clkp]) - * SCL = clkp / (8 + 2 * RCAR_DEFAULT_SMD + RCAR_SCLD_RATIO * x + * SCL = clkp / (8 + 2 * SMD + RCAR_SCLD_RATIO * x * + RCAR_SCHD_RATIO * x + F[...]) * * with: sum_ratio = RCAR_SCLD_RATIO + RCAR_SCHD_RATIO - * and: smd = RCAR_DEFAULT_SMD * * SCL = clkp / (8 + 2 * smd + sum_ratio * x + F[...]) * 8 + 2 * smd + sum_ratio * x + F[...] = clkp / SCL * x = ((clkp / SCL) - 8 - 2 * smd - F[...]) / sum_ratio */ x = DIV_ROUND_UP(rate, t.bus_freq_hz ?: 1); - x = DIV_ROUND_UP(x - 8 - 2 * RCAR_DEFAULT_SMD - round, sum_ratio); - scl = rate / (8 + 2 * RCAR_DEFAULT_SMD + sum_ratio * x + round); + x = DIV_ROUND_UP(x - 8 - 2 * priv->smd - round, sum_ratio); + scl = rate / (8 + 2 * priv->smd + sum_ratio * x + round); - /* Bail out if values don't fit into 16 bit or SMD became too large */ - if (x * RCAR_SCLD_RATIO > 0xffff || RCAR_DEFAULT_SMD > x * RCAR_SCHD_RATIO) + if (x == 0 || x * RCAR_SCLD_RATIO > 0xffff) goto err_no_val; priv->icccr = cdf; priv->schd = RCAR_SCHD_RATIO * x; priv->scld = RCAR_SCLD_RATIO * x; + if (priv->smd >= priv->schd) + priv->smd = priv->schd - 1; - dev_dbg(dev, "clk %u/%u(%lu), round %u, CDF: %u SCHD %u SCLD %u\n", - scl, t.bus_freq_hz, rate, round, cdf, priv->schd, priv->scld); + dev_dbg(dev, "clk %u/%u(%lu), round %u, CDF: %u SCHD %u SCLD %u SMD %u\n", + scl, t.bus_freq_hz, rate, round, cdf, priv->schd, priv->scld, priv->smd); } return 0; @@ -431,8 +446,8 @@ static void rcar_i2c_cleanup_dma(struct rcar_i2c_priv *priv, bool terminate) dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg), sg_dma_len(&priv->sg), priv->dma_direction); - /* Gen3 can only do one RXDMA per transfer and we just completed it */ - if (priv->devtype == I2C_RCAR_GEN3 && + /* Gen3+ can only do one RXDMA per transfer and we just completed it */ + if (priv->devtype >= I2C_RCAR_GEN3 && priv->dma_direction == DMA_FROM_DEVICE) priv->flags |= ID_P_NO_RXDMA; @@ -886,8 +901,8 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, if (ret < 0) goto out; - /* Gen3 needs a reset before allowing RXDMA once */ - if (priv->devtype == I2C_RCAR_GEN3) { + /* Gen3+ needs a reset. That also allows RXDMA once */ + if (priv->devtype >= I2C_RCAR_GEN3) { priv->flags &= ~ID_P_NO_RXDMA; ret = rcar_i2c_do_reset(priv); if (ret) @@ -1072,10 +1087,12 @@ static const struct of_device_id rcar_i2c_dt_ids[] = { { .compatible = "renesas,i2c-r8a7794", .data = (void *)I2C_RCAR_GEN2 }, { .compatible = "renesas,i2c-r8a7795", .data = (void *)I2C_RCAR_GEN3 }, { .compatible = "renesas,i2c-r8a7796", .data = (void *)I2C_RCAR_GEN3 }, + /* S4 has no FM+ bit */ + { .compatible = "renesas,i2c-r8a779f0", .data = (void *)I2C_RCAR_GEN3 }, { .compatible = "renesas,rcar-gen1-i2c", .data = (void *)I2C_RCAR_GEN1 }, { .compatible = "renesas,rcar-gen2-i2c", .data = (void *)I2C_RCAR_GEN2 }, { .compatible = "renesas,rcar-gen3-i2c", .data = (void *)I2C_RCAR_GEN3 }, - { .compatible = "renesas,rcar-gen4-i2c", .data = (void *)I2C_RCAR_GEN3 }, + { .compatible = "renesas,rcar-gen4-i2c", .data = (void *)I2C_RCAR_GEN4 }, {}, }; MODULE_DEVICE_TABLE(of, rcar_i2c_dt_ids); @@ -1151,7 +1168,7 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (of_property_read_bool(dev->of_node, "smbus")) priv->flags |= ID_P_HOST_NOTIFY; - if (priv->devtype == I2C_RCAR_GEN3) { + if (priv->devtype >= I2C_RCAR_GEN3) { priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(priv->rstc)) { ret = PTR_ERR(priv->rstc); diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 4362db7c57892c83d53f3c0062630d7b7b7d9990..086fdf262e7b60e26c48727f6f8f586615674117 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -1295,8 +1295,12 @@ static int rk3x_i2c_probe(struct platform_device *pdev) return -EINVAL; } - /* 27+i: write mask, 11+i: value */ - value = BIT(27 + bus_nr) | BIT(11 + bus_nr); + /* rv1126 i2c2 uses non-sequential write mask 20, value 4 */ + if (i2c->soc_data == &rv1126_soc_data && bus_nr == 2) + value = BIT(20) | BIT(4); + else + /* 27+i: write mask, 11+i: value */ + value = BIT(27 + bus_nr) | BIT(11 + bus_nr); ret = regmap_write(grf, i2c->soc_data->grf_offset, value); if (ret != 0) { diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index c56886af724ea87e3f863cd6140951530e620d76..275f7c42165cde7881bad16433c9150f653af91f 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -76,6 +76,7 @@ #define QUIRK_HDMIPHY (1 << 1) #define QUIRK_NO_GPIO (1 << 2) #define QUIRK_POLL (1 << 3) +#define QUIRK_ATOMIC (1 << 4) /* Max time to wait for bus to become idle after a xfer (in us) */ #define S3C2410_IDLE_TIMEOUT 5000 @@ -174,7 +175,7 @@ static inline void s3c24xx_i2c_master_complete(struct s3c24xx_i2c *i2c, int ret) if (ret) i2c->msg_idx = ret; - if (!(i2c->quirks & QUIRK_POLL)) + if (!(i2c->quirks & (QUIRK_POLL | QUIRK_ATOMIC))) wake_up(&i2c->wait); } @@ -216,8 +217,17 @@ static bool is_ack(struct s3c24xx_i2c *i2c) int tries; for (tries = 50; tries; --tries) { - if (readl(i2c->regs + S3C2410_IICCON) - & S3C2410_IICCON_IRQPEND) { + unsigned long tmp = readl(i2c->regs + S3C2410_IICCON); + + if (!(tmp & S3C2410_IICCON_ACKEN)) { + /* + * Wait a bit for the bus to stabilize, + * delay estimated experimentally. + */ + usleep_range(100, 200); + return true; + } + if (tmp & S3C2410_IICCON_IRQPEND) { if (!(readl(i2c->regs + S3C2410_IICSTAT) & S3C2410_IICSTAT_LASTBIT)) return true; @@ -270,16 +280,6 @@ static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c, stat |= S3C2410_IICSTAT_START; writel(stat, i2c->regs + S3C2410_IICSTAT); - - if (i2c->quirks & QUIRK_POLL) { - while ((i2c->msg_num != 0) && is_ack(i2c)) { - i2c_s3c_irq_nextbyte(i2c, stat); - stat = readl(i2c->regs + S3C2410_IICSTAT); - - if (stat & S3C2410_IICSTAT_ARBITR) - dev_err(i2c->dev, "deal with arbitration loss\n"); - } - } } static inline void s3c24xx_i2c_stop(struct s3c24xx_i2c *i2c, int ret) @@ -685,7 +685,7 @@ static void s3c24xx_i2c_wait_idle(struct s3c24xx_i2c *i2c) static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c, struct i2c_msg *msgs, int num) { - unsigned long timeout; + unsigned long timeout = 0; int ret; ret = s3c24xx_i2c_set_master(i2c); @@ -704,17 +704,20 @@ static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c, s3c24xx_i2c_enable_irq(i2c); s3c24xx_i2c_message_start(i2c, msgs); - if (i2c->quirks & QUIRK_POLL) { - ret = i2c->msg_idx; + if (i2c->quirks & (QUIRK_POLL | QUIRK_ATOMIC)) { + while ((i2c->msg_num != 0) && is_ack(i2c)) { + unsigned long stat = readl(i2c->regs + S3C2410_IICSTAT); - if (ret != num) - dev_dbg(i2c->dev, "incomplete xfer (%d)\n", ret); + i2c_s3c_irq_nextbyte(i2c, stat); - goto out; + stat = readl(i2c->regs + S3C2410_IICSTAT); + if (stat & S3C2410_IICSTAT_ARBITR) + dev_err(i2c->dev, "deal with arbitration loss\n"); + } + } else { + timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5); } - timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5); - ret = i2c->msg_idx; /* @@ -773,6 +776,21 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, return -EREMOTEIO; } +static int s3c24xx_i2c_xfer_atomic(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + struct s3c24xx_i2c *i2c = (struct s3c24xx_i2c *)adap->algo_data; + int ret; + + disable_irq(i2c->irq); + i2c->quirks |= QUIRK_ATOMIC; + ret = s3c24xx_i2c_xfer(adap, msgs, num); + i2c->quirks &= ~QUIRK_ATOMIC; + enable_irq(i2c->irq); + + return ret; +} + /* declare our i2c functionality */ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) { @@ -783,6 +801,7 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) /* i2c bus registration info */ static const struct i2c_algorithm s3c24xx_i2c_algorithm = { .master_xfer = s3c24xx_i2c_xfer, + .master_xfer_atomic = s3c24xx_i2c_xfer_atomic, .functionality = s3c24xx_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index 421735acfa141f2394aaca68b0b872f3accd09b1..d7af8e0d7599ec3abe8275c926a4a4ae2b90d84b 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -385,7 +385,7 @@ static int smbus_cmi_probe(struct platform_device *device) smbus_cmi->adapter.owner = THIS_MODULE; smbus_cmi->adapter.algo = &acpi_smbus_cmi_algorithm; smbus_cmi->adapter.algo_data = smbus_cmi; - smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + smbus_cmi->adapter.class = I2C_CLASS_HWMON; smbus_cmi->adapter.dev.parent = &device->dev; ret = i2c_add_adapter(&smbus_cmi->adapter); diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index 1ad2a26156d17703022548e6b432014f7fb64df9..8a043f5fca1e067125be040ee13a587eb3aee7c9 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -477,7 +477,7 @@ static int sh7760_i2c_probe(struct platform_device *pdev) id->adap.nr = pdev->id; id->adap.algo = &sh7760_i2c_algo; - id->adap.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + id->adap.class = I2C_CLASS_HWMON; id->adap.retries = 3; id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index 8f71f01cb169b04fded2bf59e46a12109e5d11ac..49f8f4f1b0f0fc8eedbde2e2d57756bea432da3f 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -142,7 +142,7 @@ static struct i2c_algo_sibyte_data sibyte_board_data[2] = { static struct i2c_adapter sibyte_board_adapter[2] = { { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = NULL, .algo_data = &sibyte_board_data[0], .nr = 0, @@ -150,7 +150,7 @@ static struct i2c_adapter sibyte_board_adapter[2] = { }, { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = NULL, .algo_data = &sibyte_board_data[1], .nr = 1, diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c index 486f1e9dfb74aabff9b29ae39f9b37d16b0444a9..32476dc10ad61f0e13c8f316e41455de5020f968 100644 --- a/drivers/i2c/busses/i2c-sis5595.c +++ b/drivers/i2c/busses/i2c-sis5595.c @@ -353,7 +353,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis5595_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 87d56250d78a3e92f897b9fdd93950a1db320c28..3505cf29cedda32f0498fcf652756af907a53a50 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -493,7 +493,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis630_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, .retries = 3 }; diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c index cde8003985a58a4d1ee52d165424e95a8ad36fa1..77529dda6fcde6acb29f3916d6564378f89bdc4c 100644 --- a/drivers/i2c/busses/i2c-sis96x.c +++ b/drivers/i2c/busses/i2c-sis96x.c @@ -228,7 +228,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis96x_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 983509936727edfdd8f28320076f2f2ef068263b..01210452216b333abd64e06451524784a537bca8 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -50,6 +50,7 @@ #define STM32F7_I2C_TXDR 0x28 /* STM32F7 I2C control 1 */ +#define STM32_I2C_CR1_FMP BIT(24) #define STM32F7_I2C_CR1_PECEN BIT(23) #define STM32F7_I2C_CR1_ALERTEN BIT(22) #define STM32F7_I2C_CR1_SMBHEN BIT(20) @@ -226,6 +227,8 @@ struct stm32f7_i2c_spec { * @rise_time: Rise time (ns) * @fall_time: Fall time (ns) * @fmp_clr_offset: Fast Mode Plus clear register offset from set register + * @single_it_line: Only a single IT line is used for both events/errors + * @fmp_cr1_bit: Fast Mode Plus control is done via a bit in CR1 */ struct stm32f7_i2c_setup { u32 speed_freq; @@ -233,6 +236,8 @@ struct stm32f7_i2c_setup { u32 rise_time; u32 fall_time; u32 fmp_clr_offset; + bool single_it_line; + bool fmp_cr1_bit; }; /** @@ -418,6 +423,13 @@ static const struct stm32f7_i2c_setup stm32mp13_setup = { .fmp_clr_offset = 0x4, }; +static const struct stm32f7_i2c_setup stm32mp25_setup = { + .rise_time = STM32F7_I2C_RISE_TIME_DEFAULT, + .fall_time = STM32F7_I2C_FALL_TIME_DEFAULT, + .single_it_line = true, + .fmp_cr1_bit = true, +}; + static inline void stm32f7_i2c_set_bits(void __iomem *reg, u32 mask) { writel_relaxed(readl_relaxed(reg) | mask, reg); @@ -1419,15 +1431,13 @@ static bool stm32f7_i2c_is_slave_busy(struct stm32f7_i2c_dev *i2c_dev) return i == busy; } -static irqreturn_t stm32f7_i2c_slave_isr_event(struct stm32f7_i2c_dev *i2c_dev) +static irqreturn_t stm32f7_i2c_slave_isr_event(struct stm32f7_i2c_dev *i2c_dev, u32 status) { void __iomem *base = i2c_dev->base; - u32 cr2, status, mask; + u32 cr2, mask; u8 val; int ret; - status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); - /* Slave transmitter mode */ if (status & STM32F7_I2C_ISR_TXIS) { i2c_slave_event(i2c_dev->slave_running, @@ -1494,23 +1504,81 @@ static irqreturn_t stm32f7_i2c_slave_isr_event(struct stm32f7_i2c_dev *i2c_dev) return IRQ_HANDLED; } -static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) +static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev, u32 status) { - struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; - struct stm32_i2c_dma *dma = i2c_dev->dma; + u16 addr = f7_msg->addr; void __iomem *base = i2c_dev->base; - u32 status, mask; - int ret = IRQ_HANDLED; + struct device *dev = i2c_dev->dev; + struct stm32_i2c_dma *dma = i2c_dev->dma; - /* Check if the interrupt if for a slave device */ - if (!i2c_dev->master_mode) { - ret = stm32f7_i2c_slave_isr_event(i2c_dev); - return ret; + /* Bus error */ + if (status & STM32F7_I2C_ISR_BERR) { + dev_err(dev, "Bus error accessing addr 0x%x\n", addr); + writel_relaxed(STM32F7_I2C_ICR_BERRCF, base + STM32F7_I2C_ICR); + stm32f7_i2c_release_bus(&i2c_dev->adap); + f7_msg->result = -EIO; + } + + /* Arbitration loss */ + if (status & STM32F7_I2C_ISR_ARLO) { + dev_dbg(dev, "Arbitration loss accessing addr 0x%x\n", addr); + writel_relaxed(STM32F7_I2C_ICR_ARLOCF, base + STM32F7_I2C_ICR); + f7_msg->result = -EAGAIN; + } + + if (status & STM32F7_I2C_ISR_PECERR) { + dev_err(dev, "PEC error in reception accessing addr 0x%x\n", addr); + writel_relaxed(STM32F7_I2C_ICR_PECCF, base + STM32F7_I2C_ICR); + f7_msg->result = -EINVAL; } + if (status & STM32F7_I2C_ISR_ALERT) { + dev_dbg(dev, "SMBus alert received\n"); + writel_relaxed(STM32F7_I2C_ICR_ALERTCF, base + STM32F7_I2C_ICR); + i2c_handle_smbus_alert(i2c_dev->alert->ara); + return IRQ_HANDLED; + } + + if (!i2c_dev->slave_running) { + u32 mask; + /* Disable interrupts */ + if (stm32f7_i2c_is_slave_registered(i2c_dev)) + mask = STM32F7_I2C_XFER_IRQ_MASK; + else + mask = STM32F7_I2C_ALL_IRQ_MASK; + stm32f7_i2c_disable_irq(i2c_dev, mask); + } + + /* Disable dma */ + if (i2c_dev->use_dma) { + stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_async(dma->chan_using); + } + + i2c_dev->master_mode = false; + complete(&i2c_dev->complete); + + return IRQ_HANDLED; +} + +#define STM32F7_ERR_EVENTS (STM32F7_I2C_ISR_BERR | STM32F7_I2C_ISR_ARLO |\ + STM32F7_I2C_ISR_PECERR | STM32F7_I2C_ISR_ALERT) +static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) +{ + struct stm32f7_i2c_dev *i2c_dev = data; + u32 status; + status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); + /* + * Check if the interrupt is for a slave device or related + * to errors flags (in case of single it line mode) + */ + if (!i2c_dev->master_mode || + (i2c_dev->setup.single_it_line && (status & STM32F7_ERR_EVENTS))) + return IRQ_WAKE_THREAD; + /* Tx empty */ if (status & STM32F7_I2C_ISR_TXIS) stm32f7_i2c_write_tx_data(i2c_dev); @@ -1519,6 +1587,33 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) if (status & STM32F7_I2C_ISR_RXNE) stm32f7_i2c_read_rx_data(i2c_dev); + /* Wake up the thread if other flags are raised */ + if (status & + (STM32F7_I2C_ISR_NACKF | STM32F7_I2C_ISR_STOPF | + STM32F7_I2C_ISR_TC | STM32F7_I2C_ISR_TCR)) + return IRQ_WAKE_THREAD; + + return IRQ_HANDLED; +} + +static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) +{ + struct stm32f7_i2c_dev *i2c_dev = data; + struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; + struct stm32_i2c_dma *dma = i2c_dev->dma; + void __iomem *base = i2c_dev->base; + u32 status, mask; + int ret; + + status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); + + if (!i2c_dev->master_mode) + return stm32f7_i2c_slave_isr_event(i2c_dev, status); + + /* Handle errors in case of this handler is used for events/errors */ + if (i2c_dev->setup.single_it_line && (status & STM32F7_ERR_EVENTS)) + return stm32f7_i2c_handle_isr_errs(i2c_dev, status); + /* NACK received */ if (status & STM32F7_I2C_ISR_NACKF) { dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n", @@ -1531,33 +1626,28 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) f7_msg->result = -ENXIO; } - /* STOP detection flag */ - if (status & STM32F7_I2C_ISR_STOPF) { - /* Disable interrupts */ - if (stm32f7_i2c_is_slave_registered(i2c_dev)) - mask = STM32F7_I2C_XFER_IRQ_MASK; + if (status & STM32F7_I2C_ISR_TCR) { + if (f7_msg->smbus) + stm32f7_i2c_smbus_reload(i2c_dev); else - mask = STM32F7_I2C_ALL_IRQ_MASK; - stm32f7_i2c_disable_irq(i2c_dev, mask); - - /* Clear STOP flag */ - writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR); - - if (i2c_dev->use_dma && !f7_msg->result) { - ret = IRQ_WAKE_THREAD; - } else { - i2c_dev->master_mode = false; - complete(&i2c_dev->complete); - } + stm32f7_i2c_reload(i2c_dev); } /* Transfer complete */ if (status & STM32F7_I2C_ISR_TC) { + /* Wait for dma transfer completion before sending next message */ + if (i2c_dev->use_dma && !f7_msg->result) { + ret = wait_for_completion_timeout(&i2c_dev->dma->dma_complete, HZ); + if (!ret) { + dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__); + stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_async(dma->chan_using); + f7_msg->result = -ETIMEDOUT; + } + } if (f7_msg->stop) { mask = STM32F7_I2C_CR2_STOP; stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask); - } else if (i2c_dev->use_dma && !f7_msg->result) { - ret = IRQ_WAKE_THREAD; } else if (f7_msg->smbus) { stm32f7_i2c_smbus_rep_start(i2c_dev); } else { @@ -1567,47 +1657,18 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) } } - if (status & STM32F7_I2C_ISR_TCR) { - if (f7_msg->smbus) - stm32f7_i2c_smbus_reload(i2c_dev); + /* STOP detection flag */ + if (status & STM32F7_I2C_ISR_STOPF) { + /* Disable interrupts */ + if (stm32f7_i2c_is_slave_registered(i2c_dev)) + mask = STM32F7_I2C_XFER_IRQ_MASK; else - stm32f7_i2c_reload(i2c_dev); - } - - return ret; -} - -static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) -{ - struct stm32f7_i2c_dev *i2c_dev = data; - struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; - struct stm32_i2c_dma *dma = i2c_dev->dma; - u32 status; - int ret; - - /* - * Wait for dma transfer completion before sending next message or - * notity the end of xfer to the client - */ - ret = wait_for_completion_timeout(&i2c_dev->dma->dma_complete, HZ); - if (!ret) { - dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__); - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - f7_msg->result = -ETIMEDOUT; - } + mask = STM32F7_I2C_ALL_IRQ_MASK; + stm32f7_i2c_disable_irq(i2c_dev, mask); - status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); + /* Clear STOP flag */ + writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR); - if (status & STM32F7_I2C_ISR_TC) { - if (f7_msg->smbus) { - stm32f7_i2c_smbus_rep_start(i2c_dev); - } else { - i2c_dev->msg_id++; - i2c_dev->msg++; - stm32f7_i2c_xfer_msg(i2c_dev, i2c_dev->msg); - } - } else { i2c_dev->master_mode = false; complete(&i2c_dev->complete); } @@ -1615,68 +1676,14 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data) +static irqreturn_t stm32f7_i2c_isr_error_thread(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; - struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; - void __iomem *base = i2c_dev->base; - struct device *dev = i2c_dev->dev; - struct stm32_i2c_dma *dma = i2c_dev->dma; u32 status; status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR); - /* Bus error */ - if (status & STM32F7_I2C_ISR_BERR) { - dev_err(dev, "<%s>: Bus error accessing addr 0x%x\n", - __func__, f7_msg->addr); - writel_relaxed(STM32F7_I2C_ICR_BERRCF, base + STM32F7_I2C_ICR); - stm32f7_i2c_release_bus(&i2c_dev->adap); - f7_msg->result = -EIO; - } - - /* Arbitration loss */ - if (status & STM32F7_I2C_ISR_ARLO) { - dev_dbg(dev, "<%s>: Arbitration loss accessing addr 0x%x\n", - __func__, f7_msg->addr); - writel_relaxed(STM32F7_I2C_ICR_ARLOCF, base + STM32F7_I2C_ICR); - f7_msg->result = -EAGAIN; - } - - if (status & STM32F7_I2C_ISR_PECERR) { - dev_err(dev, "<%s>: PEC error in reception accessing addr 0x%x\n", - __func__, f7_msg->addr); - writel_relaxed(STM32F7_I2C_ICR_PECCF, base + STM32F7_I2C_ICR); - f7_msg->result = -EINVAL; - } - - if (status & STM32F7_I2C_ISR_ALERT) { - dev_dbg(dev, "<%s>: SMBus alert received\n", __func__); - writel_relaxed(STM32F7_I2C_ICR_ALERTCF, base + STM32F7_I2C_ICR); - i2c_handle_smbus_alert(i2c_dev->alert->ara); - return IRQ_HANDLED; - } - - if (!i2c_dev->slave_running) { - u32 mask; - /* Disable interrupts */ - if (stm32f7_i2c_is_slave_registered(i2c_dev)) - mask = STM32F7_I2C_XFER_IRQ_MASK; - else - mask = STM32F7_I2C_ALL_IRQ_MASK; - stm32f7_i2c_disable_irq(i2c_dev, mask); - } - - /* Disable dma */ - if (i2c_dev->use_dma) { - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - } - - i2c_dev->master_mode = false; - complete(&i2c_dev->complete); - - return IRQ_HANDLED; + return stm32f7_i2c_handle_isr_errs(i2c_dev, status); } static int stm32f7_i2c_wait_polling(struct stm32f7_i2c_dev *i2c_dev) @@ -2012,23 +2019,27 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) static int stm32f7_i2c_write_fm_plus_bits(struct stm32f7_i2c_dev *i2c_dev, bool enable) { - int ret; + int ret = 0; if (i2c_dev->bus_rate <= I2C_MAX_FAST_MODE_FREQ || - IS_ERR_OR_NULL(i2c_dev->regmap)) + (!i2c_dev->setup.fmp_cr1_bit && IS_ERR_OR_NULL(i2c_dev->regmap))) /* Optional */ return 0; - if (i2c_dev->fmp_sreg == i2c_dev->fmp_creg) - ret = regmap_update_bits(i2c_dev->regmap, - i2c_dev->fmp_sreg, - i2c_dev->fmp_mask, - enable ? i2c_dev->fmp_mask : 0); - else - ret = regmap_write(i2c_dev->regmap, - enable ? i2c_dev->fmp_sreg : - i2c_dev->fmp_creg, - i2c_dev->fmp_mask); + if (i2c_dev->setup.fmp_cr1_bit) { + if (enable) + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32_I2C_CR1_FMP); + else + stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32_I2C_CR1_FMP); + } else { + if (i2c_dev->fmp_sreg == i2c_dev->fmp_creg) + ret = regmap_update_bits(i2c_dev->regmap, i2c_dev->fmp_sreg, + i2c_dev->fmp_mask, enable ? i2c_dev->fmp_mask : 0); + else + ret = regmap_write(i2c_dev->regmap, + enable ? i2c_dev->fmp_sreg : i2c_dev->fmp_creg, + i2c_dev->fmp_mask); + } return ret; } @@ -2162,6 +2173,13 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) if (!i2c_dev) return -ENOMEM; + setup = of_device_get_match_data(&pdev->dev); + if (!setup) { + dev_err(&pdev->dev, "Can't get device data\n"); + return -ENODEV; + } + i2c_dev->setup = *setup; + i2c_dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(i2c_dev->base)) return PTR_ERR(i2c_dev->base); @@ -2171,10 +2189,6 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) if (irq_event < 0) return irq_event; - irq_error = platform_get_irq(pdev, 1); - if (irq_error < 0) - return irq_error; - i2c_dev->wakeup_src = of_property_read_bool(pdev->dev.of_node, "wakeup-source"); @@ -2199,26 +2213,22 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) stm32f7_i2c_isr_event_thread, IRQF_ONESHOT, pdev->name, i2c_dev); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq event %i\n", - irq_event); - return ret; - } - - ret = devm_request_irq(&pdev->dev, irq_error, stm32f7_i2c_isr_error, 0, - pdev->name, i2c_dev); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq error %i\n", - irq_error); - return ret; - } - - setup = of_device_get_match_data(&pdev->dev); - if (!setup) { - dev_err(&pdev->dev, "Can't get device data\n"); - return -ENODEV; + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to request irq event\n"); + + if (!i2c_dev->setup.single_it_line) { + irq_error = platform_get_irq(pdev, 1); + if (irq_error < 0) + return irq_error; + + ret = devm_request_threaded_irq(&pdev->dev, irq_error, + NULL, + stm32f7_i2c_isr_error_thread, + IRQF_ONESHOT, + pdev->name, i2c_dev); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to request irq error\n"); } - i2c_dev->setup = *setup; ret = stm32f7_i2c_setup_timing(i2c_dev, &i2c_dev->setup); if (ret) @@ -2226,9 +2236,12 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) /* Setup Fast mode plus if necessary */ if (i2c_dev->bus_rate > I2C_MAX_FAST_MODE_FREQ) { - ret = stm32f7_i2c_setup_fm_plus_bits(pdev, i2c_dev); - if (ret) - return ret; + if (!i2c_dev->setup.fmp_cr1_bit) { + ret = stm32f7_i2c_setup_fm_plus_bits(pdev, i2c_dev); + if (ret) + return ret; + } + ret = stm32f7_i2c_write_fm_plus_bits(i2c_dev, true); if (ret) return ret; @@ -2507,6 +2520,7 @@ static const struct of_device_id stm32f7_i2c_match[] = { { .compatible = "st,stm32f7-i2c", .data = &stm32f7_setup}, { .compatible = "st,stm32mp15-i2c", .data = &stm32mp15_setup}, { .compatible = "st,stm32mp13-i2c", .data = &stm32mp13_setup}, + { .compatible = "st,stm32mp25-i2c", .data = &stm32mp25_setup}, {}, }; MODULE_DEVICE_TABLE(of, stm32f7_i2c_match); diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c index ad4f09c7f0275057854a004c7b69e3b57524d2d4..7ed29992a97ff87a3f31c296ced8dc9b3df5a08f 100644 --- a/drivers/i2c/busses/i2c-via.c +++ b/drivers/i2c/busses/i2c-via.c @@ -70,7 +70,7 @@ static struct i2c_algo_bit_data bit_data = { static struct i2c_adapter vt586b_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .name = "VIA i2c", .algo_data = &bit_data, }; diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 970ccdcbb8896232e79606d1ea8078593f6f1f2a..2cc7bba3b8bf8d2f64c4ad1098927e2a2f5a269f 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -304,7 +304,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter vt596_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-wmt.c b/drivers/i2c/busses/i2c-wmt.c index 76118abc6e104d083047714e9cb036116ab9ef85..ec2a8da134e56d01be06588551db26bca47caef4 100644 --- a/drivers/i2c/busses/i2c-wmt.c +++ b/drivers/i2c/busses/i2c-wmt.c @@ -74,9 +74,6 @@ #define MCR_APB_96M 7 #define MCR_APB_166M 12 -#define I2C_MODE_STANDARD 0 -#define I2C_MODE_FAST 1 - #define WMT_I2C_TIMEOUT (msecs_to_jiffies(1000)) struct wmt_i2c_dev { @@ -85,7 +82,7 @@ struct wmt_i2c_dev { struct device *dev; void __iomem *base; struct clk *clk; - int mode; + u16 tcr; int irq; u16 cmd_status; }; @@ -109,6 +106,12 @@ static int wmt_i2c_wait_bus_not_busy(struct wmt_i2c_dev *i2c_dev) static int wmt_check_status(struct wmt_i2c_dev *i2c_dev) { int ret = 0; + unsigned long wait_result; + + wait_result = wait_for_completion_timeout(&i2c_dev->complete, + msecs_to_jiffies(500)); + if (!wait_result) + return -ETIMEDOUT; if (i2c_dev->cmd_status & ISR_NACK_ADDR) ret = -EIO; @@ -119,21 +122,13 @@ static int wmt_check_status(struct wmt_i2c_dev *i2c_dev) return ret; } -static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, +static int wmt_i2c_write(struct wmt_i2c_dev *i2c_dev, struct i2c_msg *pmsg, int last) { - struct wmt_i2c_dev *i2c_dev = i2c_get_adapdata(adap); - u16 val, tcr_val; + u16 val, tcr_val = i2c_dev->tcr; int ret; - unsigned long wait_result; int xfer_len = 0; - if (!(pmsg->flags & I2C_M_NOSTART)) { - ret = wmt_i2c_wait_bus_not_busy(i2c_dev); - if (ret < 0) - return ret; - } - if (pmsg->len == 0) { /* * We still need to run through the while (..) once, so @@ -148,20 +143,12 @@ static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, if (!(pmsg->flags & I2C_M_NOSTART)) { val = readw(i2c_dev->base + REG_CR); val &= ~CR_TX_END; - writew(val, i2c_dev->base + REG_CR); - - val = readw(i2c_dev->base + REG_CR); val |= CR_CPU_RDY; writew(val, i2c_dev->base + REG_CR); } reinit_completion(&i2c_dev->complete); - if (i2c_dev->mode == I2C_MODE_STANDARD) - tcr_val = TCR_STANDARD_MODE; - else - tcr_val = TCR_FAST_MODE; - tcr_val |= (TCR_MASTER_WRITE | (pmsg->addr & TCR_SLAVE_ADDR_MASK)); writew(tcr_val, i2c_dev->base + REG_TCR); @@ -173,12 +160,6 @@ static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, } while (xfer_len < pmsg->len) { - wait_result = wait_for_completion_timeout(&i2c_dev->complete, - msecs_to_jiffies(500)); - - if (wait_result == 0) - return -ETIMEDOUT; - ret = wmt_check_status(i2c_dev); if (ret) return ret; @@ -210,47 +191,24 @@ static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, return 0; } -static int wmt_i2c_read(struct i2c_adapter *adap, struct i2c_msg *pmsg, - int last) +static int wmt_i2c_read(struct wmt_i2c_dev *i2c_dev, struct i2c_msg *pmsg) { - struct wmt_i2c_dev *i2c_dev = i2c_get_adapdata(adap); - u16 val, tcr_val; + u16 val, tcr_val = i2c_dev->tcr; int ret; - unsigned long wait_result; u32 xfer_len = 0; - if (!(pmsg->flags & I2C_M_NOSTART)) { - ret = wmt_i2c_wait_bus_not_busy(i2c_dev); - if (ret < 0) - return ret; - } - - val = readw(i2c_dev->base + REG_CR); - val &= ~CR_TX_END; - writew(val, i2c_dev->base + REG_CR); - val = readw(i2c_dev->base + REG_CR); - val &= ~CR_TX_NEXT_NO_ACK; - writew(val, i2c_dev->base + REG_CR); + val &= ~(CR_TX_END | CR_TX_NEXT_NO_ACK); - if (!(pmsg->flags & I2C_M_NOSTART)) { - val = readw(i2c_dev->base + REG_CR); + if (!(pmsg->flags & I2C_M_NOSTART)) val |= CR_CPU_RDY; - writew(val, i2c_dev->base + REG_CR); - } - if (pmsg->len == 1) { - val = readw(i2c_dev->base + REG_CR); + if (pmsg->len == 1) val |= CR_TX_NEXT_NO_ACK; - writew(val, i2c_dev->base + REG_CR); - } - reinit_completion(&i2c_dev->complete); + writew(val, i2c_dev->base + REG_CR); - if (i2c_dev->mode == I2C_MODE_STANDARD) - tcr_val = TCR_STANDARD_MODE; - else - tcr_val = TCR_FAST_MODE; + reinit_completion(&i2c_dev->complete); tcr_val |= TCR_MASTER_READ | (pmsg->addr & TCR_SLAVE_ADDR_MASK); @@ -263,12 +221,6 @@ static int wmt_i2c_read(struct i2c_adapter *adap, struct i2c_msg *pmsg, } while (xfer_len < pmsg->len) { - wait_result = wait_for_completion_timeout(&i2c_dev->complete, - msecs_to_jiffies(500)); - - if (!wait_result) - return -ETIMEDOUT; - ret = wmt_check_status(i2c_dev); if (ret) return ret; @@ -276,15 +228,10 @@ static int wmt_i2c_read(struct i2c_adapter *adap, struct i2c_msg *pmsg, pmsg->buf[xfer_len] = readw(i2c_dev->base + REG_CDR) >> 8; xfer_len++; - if (xfer_len == pmsg->len - 1) { - val = readw(i2c_dev->base + REG_CR); - val |= (CR_TX_NEXT_NO_ACK | CR_CPU_RDY); - writew(val, i2c_dev->base + REG_CR); - } else { - val = readw(i2c_dev->base + REG_CR); - val |= CR_CPU_RDY; - writew(val, i2c_dev->base + REG_CR); - } + val = readw(i2c_dev->base + REG_CR) | CR_CPU_RDY; + if (xfer_len == pmsg->len - 1) + val |= CR_TX_NEXT_NO_ACK; + writew(val, i2c_dev->base + REG_CR); } return 0; @@ -295,17 +242,22 @@ static int wmt_i2c_xfer(struct i2c_adapter *adap, int num) { struct i2c_msg *pmsg; - int i, is_last; + int i; int ret = 0; + struct wmt_i2c_dev *i2c_dev = i2c_get_adapdata(adap); for (i = 0; ret >= 0 && i < num; i++) { - is_last = ((i + 1) == num); - pmsg = &msgs[i]; + if (!(pmsg->flags & I2C_M_NOSTART)) { + ret = wmt_i2c_wait_bus_not_busy(i2c_dev); + if (ret < 0) + return ret; + } + if (pmsg->flags & I2C_M_RD) - ret = wmt_i2c_read(adap, pmsg, is_last); + ret = wmt_i2c_read(i2c_dev, pmsg); else - ret = wmt_i2c_write(adap, pmsg, is_last); + ret = wmt_i2c_write(i2c_dev, pmsg, (i + 1) == num); } return (ret < 0) ? ret : i; @@ -359,10 +311,10 @@ static int wmt_i2c_reset_hardware(struct wmt_i2c_dev *i2c_dev) readw(i2c_dev->base + REG_CSR); /* read clear */ writew(ISR_WRITE_ALL, i2c_dev->base + REG_ISR); - if (i2c_dev->mode == I2C_MODE_STANDARD) - writew(SCL_TIMEOUT(128) | TR_STD, i2c_dev->base + REG_TR); - else + if (i2c_dev->tcr == TCR_FAST_MODE) writew(SCL_TIMEOUT(128) | TR_HS, i2c_dev->base + REG_TR); + else + writew(SCL_TIMEOUT(128) | TR_STD, i2c_dev->base + REG_TR); return 0; } @@ -395,10 +347,9 @@ static int wmt_i2c_probe(struct platform_device *pdev) return PTR_ERR(i2c_dev->clk); } - i2c_dev->mode = I2C_MODE_STANDARD; err = of_property_read_u32(np, "clock-frequency", &clk_rate); if (!err && (clk_rate == I2C_MAX_FAST_MODE_FREQ)) - i2c_dev->mode = I2C_MODE_FAST; + i2c_dev->tcr = TCR_FAST_MODE; i2c_dev->dev = &pdev->dev; diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index 83c1db610f54b8c6d64139466e777dccc95c330f..3648382b885a4e0fc1b4248224a70c4a7fbd18eb 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -427,7 +427,7 @@ static struct scx200_acb_iface *scx200_create_iface(const char *text, snprintf(adapter->name, sizeof(adapter->name), "%s ACB%d", text, index); adapter->owner = THIS_MODULE; adapter->algo = &scx200_acb_algorithm; - adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adapter->class = I2C_CLASS_HWMON; adapter->dev.parent = dev; mutex_init(&iface->mutex); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index eac90a3cf61a4b7740108974ab114105cb74ae70..3bd48d4b6318fe1fe83e3718c59713f354ff9878 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,8 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver); static DEFINE_STATIC_KEY_FALSE(i2c_trace_msg_key); static bool is_registered; +static struct dentry *i2c_debugfs_root; + int i2c_transfer_trace_reg(void) { static_branch_inc(&i2c_trace_msg_key); @@ -689,7 +692,7 @@ static struct attribute *i2c_dev_attrs[] = { }; ATTRIBUTE_GROUPS(i2c_dev); -struct bus_type i2c_bus_type = { +const struct bus_type i2c_bus_type = { .name = "i2c", .match = i2c_device_match, .probe = i2c_device_probe, @@ -1524,6 +1527,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap) goto out_list; } + adap->debugfs = debugfs_create_dir(dev_name(&adap->dev), i2c_debugfs_root); + res = i2c_setup_smbus_alert(adap); if (res) goto out_reg; @@ -1563,6 +1568,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap) return 0; out_reg: + debugfs_remove_recursive(adap->debugfs); init_completion(&adap->dev_released); device_unregister(&adap->dev); wait_for_completion(&adap->dev_released); @@ -1764,6 +1770,8 @@ void i2c_del_adapter(struct i2c_adapter *adap) i2c_host_notify_irq_teardown(adap); + debugfs_remove_recursive(adap->debugfs); + /* wait until all references to the device are gone * * FIXME: This is old code and should ideally be replaced by an @@ -2061,6 +2069,8 @@ static int __init i2c_init(void) is_registered = true; + i2c_debugfs_root = debugfs_create_dir("i2c", NULL); + #ifdef CONFIG_I2C_COMPAT i2c_adapter_compat_class = class_compat_register("i2c-adapter"); if (!i2c_adapter_compat_class) { @@ -2099,6 +2109,7 @@ static void __exit i2c_exit(void) #ifdef CONFIG_I2C_COMPAT class_compat_unregister(i2c_adapter_compat_class); #endif + debugfs_remove_recursive(i2c_debugfs_root); bus_unregister(&i2c_bus_type); tracepoint_synchronize_unregister(); } diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 138c3f5e0093a5c2f8ceeb6ddeca991dd70fbad6..74807c6db596d810fffe035268875a61b1074881 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -308,8 +308,8 @@ EXPORT_SYMBOL_GPL(i2c_free_slave_host_notify_device); * target systems are the same. * Restrictions to automatic SPD instantiation: * - Only works if all filled slots have the same memory type - * - Only works for DDR2, DDR3 and DDR4 for now - * - Only works on systems with 1 to 4 memory slots + * - Only works for DDR, DDR2, DDR3 and DDR4 for now + * - Only works on systems with 1 to 8 memory slots */ #if IS_ENABLED(CONFIG_DMI) void i2c_register_spd(struct i2c_adapter *adap) @@ -354,9 +354,9 @@ void i2c_register_spd(struct i2c_adapter *adap) dev_info(&adap->dev, "%d/%d memory slots populated (from DMI)\n", dimm_count, slot_count); - if (slot_count > 4) { + if (slot_count > 8) { dev_warn(&adap->dev, - "Systems with more than 4 memory slots not supported yet, not instantiating SPD\n"); + "Systems with more than 8 memory slots not supported yet, not instantiating SPD\n"); return; } diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c index d642cad219d9e65b561659cfb895af11c6cba41b..09e7b7bf4c5f71b586372ef43d83ec43f391e573 100644 --- a/drivers/i2c/i2c-stub.c +++ b/drivers/i2c/i2c-stub.c @@ -308,7 +308,7 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter stub_adapter = { .owner = THIS_MODULE, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &smbus_algorithm, .name = "SMBus stub driver", }; diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 9efc1ed01577b1987f3493c686bd8133eb31fcdb..8489971babd37b55ef794ebd8ec8446d9797acca 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -159,7 +159,6 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) struct regmux *mux; struct i2c_adapter *parent; struct resource *res; - unsigned int class; int i, ret, nr; mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL); @@ -213,9 +212,8 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) for (i = 0; i < mux->data.n_values; i++) { nr = mux->data.base_nr ? (mux->data.base_nr + i) : 0; - class = mux->data.classes ? mux->data.classes[i] : 0; - ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class); + ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], 0); if (ret) goto err_del_mux_adapters; } diff --git a/drivers/input/input.c b/drivers/input/input.c index 8c5fdb0f858ab5102926c0f6858f6a0f022c44b2..f71ea4fb173fdd2950cd6a271e1975e930578ce1 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1365,8 +1365,8 @@ static ssize_t input_dev_show_##name(struct device *dev, \ { \ struct input_dev *input_dev = to_input_dev(dev); \ \ - return scnprintf(buf, PAGE_SIZE, "%s\n", \ - input_dev->name ? input_dev->name : ""); \ + return sysfs_emit(buf, "%s\n", \ + input_dev->name ? input_dev->name : ""); \ } \ static DEVICE_ATTR(name, S_IRUGO, input_dev_show_##name, NULL) @@ -1458,7 +1458,7 @@ static ssize_t inhibited_show(struct device *dev, { struct input_dev *input_dev = to_input_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", input_dev->inhibited); + return sysfs_emit(buf, "%d\n", input_dev->inhibited); } static ssize_t inhibited_store(struct device *dev, @@ -1505,7 +1505,7 @@ static ssize_t input_dev_show_id_##name(struct device *dev, \ char *buf) \ { \ struct input_dev *input_dev = to_input_dev(dev); \ - return scnprintf(buf, PAGE_SIZE, "%04x\n", input_dev->id.name); \ + return sysfs_emit(buf, "%04x\n", input_dev->id.name); \ } \ static DEVICE_ATTR(name, S_IRUGO, input_dev_show_id_##name, NULL) diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index ac6925ce836670ead078f44816e97fc2fe7008ab..7755e5b454d2cb8d1b8ec8295647c4a47d787d8c 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -412,4 +412,14 @@ config JOYSTICK_SENSEHAT To compile this driver as a module, choose M here: the module will be called sensehat_joystick. +config JOYSTICK_SEESAW + tristate "Adafruit Mini I2C Gamepad with Seesaw" + depends on I2C + select INPUT_SPARSEKMAP + help + Say Y here if you want to use the Adafruit Mini I2C Gamepad. + + To compile this driver as a module, choose M here: the module will be + called adafruit-seesaw. + endif diff --git a/drivers/input/joystick/Makefile b/drivers/input/joystick/Makefile index 3937535f00981e52052bea22e449326c3c88716e..9976f596a92085bda1bb28d7ca33e934f1be0ba3 100644 --- a/drivers/input/joystick/Makefile +++ b/drivers/input/joystick/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_JOYSTICK_N64) += n64joy.o obj-$(CONFIG_JOYSTICK_PSXPAD_SPI) += psxpad-spi.o obj-$(CONFIG_JOYSTICK_PXRC) += pxrc.o obj-$(CONFIG_JOYSTICK_QWIIC) += qwiic-joystick.o +obj-$(CONFIG_JOYSTICK_SEESAW) += adafruit-seesaw.o obj-$(CONFIG_JOYSTICK_SENSEHAT) += sensehat-joystick.o obj-$(CONFIG_JOYSTICK_SIDEWINDER) += sidewinder.o obj-$(CONFIG_JOYSTICK_SPACEBALL) += spaceball.o diff --git a/drivers/input/joystick/adafruit-seesaw.c b/drivers/input/joystick/adafruit-seesaw.c new file mode 100644 index 0000000000000000000000000000000000000000..1b9279f024cc6f7bad73b05df5c79a3ebea7e1a3 --- /dev/null +++ b/drivers/input/joystick/adafruit-seesaw.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Anshul Dalal + * + * Driver for Adafruit Mini I2C Gamepad + * + * Based on the work of: + * Oleh Kravchenko (Sparkfun Qwiic Joystick driver) + * + * Datasheet: https://cdn-learn.adafruit.com/downloads/pdf/gamepad-qt.pdf + * Product page: https://www.adafruit.com/product/5743 + * Firmware and hardware sources: https://github.com/adafruit/Adafruit_Seesaw + * + * TODO: + * - Add interrupt support + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define SEESAW_DEVICE_NAME "seesaw-gamepad" + +#define SEESAW_ADC_BASE 0x0900 + +#define SEESAW_GPIO_DIRCLR_BULK 0x0103 +#define SEESAW_GPIO_BULK 0x0104 +#define SEESAW_GPIO_BULK_SET 0x0105 +#define SEESAW_GPIO_PULLENSET 0x010b + +#define SEESAW_STATUS_HW_ID 0x0001 +#define SEESAW_STATUS_SWRST 0x007f + +#define SEESAW_ADC_OFFSET 0x07 + +#define SEESAW_BUTTON_A 0x05 +#define SEESAW_BUTTON_B 0x01 +#define SEESAW_BUTTON_X 0x06 +#define SEESAW_BUTTON_Y 0x02 +#define SEESAW_BUTTON_START 0x10 +#define SEESAW_BUTTON_SELECT 0x00 + +#define SEESAW_ANALOG_X 0x0e +#define SEESAW_ANALOG_Y 0x0f + +#define SEESAW_JOYSTICK_MAX_AXIS 1023 +#define SEESAW_JOYSTICK_FUZZ 2 +#define SEESAW_JOYSTICK_FLAT 4 + +#define SEESAW_GAMEPAD_POLL_INTERVAL_MS 16 +#define SEESAW_GAMEPAD_POLL_MIN 8 +#define SEESAW_GAMEPAD_POLL_MAX 32 + +static const unsigned long SEESAW_BUTTON_MASK = + BIT(SEESAW_BUTTON_A) | BIT(SEESAW_BUTTON_B) | BIT(SEESAW_BUTTON_X) | + BIT(SEESAW_BUTTON_Y) | BIT(SEESAW_BUTTON_START) | + BIT(SEESAW_BUTTON_SELECT); + +struct seesaw_gamepad { + struct input_dev *input_dev; + struct i2c_client *i2c_client; +}; + +struct seesaw_data { + u16 x; + u16 y; + u32 button_state; +}; + +static const struct key_entry seesaw_buttons_new[] = { + { KE_KEY, SEESAW_BUTTON_A, .keycode = BTN_SOUTH }, + { KE_KEY, SEESAW_BUTTON_B, .keycode = BTN_EAST }, + { KE_KEY, SEESAW_BUTTON_X, .keycode = BTN_NORTH }, + { KE_KEY, SEESAW_BUTTON_Y, .keycode = BTN_WEST }, + { KE_KEY, SEESAW_BUTTON_START, .keycode = BTN_START }, + { KE_KEY, SEESAW_BUTTON_SELECT, .keycode = BTN_SELECT }, + { KE_END, 0 } +}; + +static int seesaw_register_read(struct i2c_client *client, u16 reg, void *buf, + int count) +{ + __be16 register_buf = cpu_to_be16(reg); + struct i2c_msg message_buf[2] = { + { + .addr = client->addr, + .flags = client->flags, + .len = sizeof(register_buf), + .buf = (u8 *)®ister_buf, + }, + { + .addr = client->addr, + .flags = client->flags | I2C_M_RD, + .len = count, + .buf = (u8 *)buf, + }, + }; + int ret; + + ret = i2c_transfer(client->adapter, message_buf, + ARRAY_SIZE(message_buf)); + if (ret < 0) + return ret; + + return 0; +} + +static int seesaw_register_write_u8(struct i2c_client *client, u16 reg, + u8 value) +{ + u8 write_buf[sizeof(reg) + sizeof(value)]; + int ret; + + put_unaligned_be16(reg, write_buf); + write_buf[sizeof(reg)] = value; + + ret = i2c_master_send(client, write_buf, sizeof(write_buf)); + if (ret < 0) + return ret; + + return 0; +} + +static int seesaw_register_write_u32(struct i2c_client *client, u16 reg, + u32 value) +{ + u8 write_buf[sizeof(reg) + sizeof(value)]; + int ret; + + put_unaligned_be16(reg, write_buf); + put_unaligned_be32(value, write_buf + sizeof(reg)); + ret = i2c_master_send(client, write_buf, sizeof(write_buf)); + if (ret < 0) + return ret; + + return 0; +} + +static int seesaw_read_data(struct i2c_client *client, struct seesaw_data *data) +{ + __be16 adc_data; + __be32 read_buf; + int err; + + err = seesaw_register_read(client, SEESAW_GPIO_BULK, + &read_buf, sizeof(read_buf)); + if (err) + return err; + + data->button_state = ~be32_to_cpu(read_buf); + + err = seesaw_register_read(client, + SEESAW_ADC_BASE | + (SEESAW_ADC_OFFSET + SEESAW_ANALOG_X), + &adc_data, sizeof(adc_data)); + if (err) + return err; + /* + * ADC reads left as max and right as 0, must be reversed since kernel + * expects reports in opposite order. + */ + data->x = SEESAW_JOYSTICK_MAX_AXIS - be16_to_cpu(adc_data); + + err = seesaw_register_read(client, + SEESAW_ADC_BASE | + (SEESAW_ADC_OFFSET + SEESAW_ANALOG_Y), + &adc_data, sizeof(adc_data)); + if (err) + return err; + + data->y = be16_to_cpu(adc_data); + + return 0; +} + +static void seesaw_poll(struct input_dev *input) +{ + struct seesaw_gamepad *private = input_get_drvdata(input); + struct seesaw_data data; + int err, i; + + err = seesaw_read_data(private->i2c_client, &data); + if (err) { + dev_err_ratelimited(&input->dev, + "failed to read joystick state: %d\n", err); + return; + } + + input_report_abs(input, ABS_X, data.x); + input_report_abs(input, ABS_Y, data.y); + + for_each_set_bit(i, &SEESAW_BUTTON_MASK, + BITS_PER_TYPE(SEESAW_BUTTON_MASK)) { + if (!sparse_keymap_report_event(input, i, + data.button_state & BIT(i), + false)) + dev_err_ratelimited(&input->dev, + "failed to report keymap event"); + } + + input_sync(input); +} + +static int seesaw_probe(struct i2c_client *client) +{ + struct seesaw_gamepad *seesaw; + u8 hardware_id; + int err; + + err = seesaw_register_write_u8(client, SEESAW_STATUS_SWRST, 0xFF); + if (err) + return err; + + /* Wait for the registers to reset before proceeding */ + usleep_range(10000, 15000); + + seesaw = devm_kzalloc(&client->dev, sizeof(*seesaw), GFP_KERNEL); + if (!seesaw) + return -ENOMEM; + + err = seesaw_register_read(client, SEESAW_STATUS_HW_ID, + &hardware_id, sizeof(hardware_id)); + if (err) + return err; + + dev_dbg(&client->dev, "Adafruit Seesaw Gamepad, Hardware ID: %02x\n", + hardware_id); + + /* Set Pin Mode to input and enable pull-up resistors */ + err = seesaw_register_write_u32(client, SEESAW_GPIO_DIRCLR_BULK, + SEESAW_BUTTON_MASK); + if (err) + return err; + err = seesaw_register_write_u32(client, SEESAW_GPIO_PULLENSET, + SEESAW_BUTTON_MASK); + if (err) + return err; + err = seesaw_register_write_u32(client, SEESAW_GPIO_BULK_SET, + SEESAW_BUTTON_MASK); + if (err) + return err; + + seesaw->i2c_client = client; + seesaw->input_dev = devm_input_allocate_device(&client->dev); + if (!seesaw->input_dev) + return -ENOMEM; + + seesaw->input_dev->id.bustype = BUS_I2C; + seesaw->input_dev->name = "Adafruit Seesaw Gamepad"; + seesaw->input_dev->phys = "i2c/" SEESAW_DEVICE_NAME; + input_set_drvdata(seesaw->input_dev, seesaw); + input_set_abs_params(seesaw->input_dev, ABS_X, + 0, SEESAW_JOYSTICK_MAX_AXIS, + SEESAW_JOYSTICK_FUZZ, SEESAW_JOYSTICK_FLAT); + input_set_abs_params(seesaw->input_dev, ABS_Y, + 0, SEESAW_JOYSTICK_MAX_AXIS, + SEESAW_JOYSTICK_FUZZ, SEESAW_JOYSTICK_FLAT); + + err = sparse_keymap_setup(seesaw->input_dev, seesaw_buttons_new, NULL); + if (err) { + dev_err(&client->dev, + "failed to set up input device keymap: %d\n", err); + return err; + } + + err = input_setup_polling(seesaw->input_dev, seesaw_poll); + if (err) { + dev_err(&client->dev, "failed to set up polling: %d\n", err); + return err; + } + + input_set_poll_interval(seesaw->input_dev, + SEESAW_GAMEPAD_POLL_INTERVAL_MS); + input_set_max_poll_interval(seesaw->input_dev, SEESAW_GAMEPAD_POLL_MAX); + input_set_min_poll_interval(seesaw->input_dev, SEESAW_GAMEPAD_POLL_MIN); + + err = input_register_device(seesaw->input_dev); + if (err) { + dev_err(&client->dev, "failed to register joystick: %d\n", err); + return err; + } + + return 0; +} + +static const struct i2c_device_id seesaw_id_table[] = { + { SEESAW_DEVICE_NAME }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(i2c, seesaw_id_table); + +static const struct of_device_id seesaw_of_table[] = { + { .compatible = "adafruit,seesaw-gamepad"}, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, seesaw_of_table); + +static struct i2c_driver seesaw_driver = { + .driver = { + .name = SEESAW_DEVICE_NAME, + .of_match_table = seesaw_of_table, + }, + .id_table = seesaw_id_table, + .probe = seesaw_probe, +}; +module_i2c_driver(seesaw_driver); + +MODULE_AUTHOR("Anshul Dalal "); +MODULE_DESCRIPTION("Adafruit Mini I2C Gamepad driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/joystick/as5011.c b/drivers/input/joystick/as5011.c index bf8b1cc0ea9c7681aab5feb4d968607866c6f70f..f1822c19a289d95832ca1390c205853da40d4fff 100644 --- a/drivers/input/joystick/as5011.c +++ b/drivers/input/joystick/as5011.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -61,7 +61,7 @@ MODULE_LICENSE("GPL"); struct as5011_device { struct input_dev *input_dev; struct i2c_client *i2c_client; - unsigned int button_gpio; + struct gpio_desc *button_gpiod; unsigned int button_irq; unsigned int axis_irq; }; @@ -114,7 +114,7 @@ static int as5011_i2c_read(struct i2c_client *client, static irqreturn_t as5011_button_interrupt(int irq, void *dev_id) { struct as5011_device *as5011 = dev_id; - int val = gpio_get_value_cansleep(as5011->button_gpio); + int val = gpiod_get_value_cansleep(as5011->button_gpiod); input_report_key(as5011->input_dev, BTN_JOYSTICK, !val); input_sync(as5011->input_dev); @@ -248,7 +248,6 @@ static int as5011_probe(struct i2c_client *client) as5011->i2c_client = client; as5011->input_dev = input_dev; - as5011->button_gpio = plat_data->button_gpio; as5011->axis_irq = plat_data->axis_irq; input_dev->name = "Austria Microsystem as5011 joystick"; @@ -262,18 +261,20 @@ static int as5011_probe(struct i2c_client *client) input_set_abs_params(as5011->input_dev, ABS_Y, AS5011_MIN_AXIS, AS5011_MAX_AXIS, AS5011_FUZZ, AS5011_FLAT); - error = gpio_request(as5011->button_gpio, "AS5011 button"); - if (error < 0) { - dev_err(&client->dev, "Failed to request button gpio\n"); + as5011->button_gpiod = devm_gpiod_get(&client->dev, NULL, GPIOD_IN); + if (IS_ERR(as5011->button_gpiod)) { + error = PTR_ERR(as5011->button_gpiod); + dev_err(&client->dev, "Failed to request button GPIO\n"); goto err_free_mem; } + gpiod_set_consumer_name(as5011->button_gpiod, "AS5011 button"); - irq = gpio_to_irq(as5011->button_gpio); + irq = gpiod_to_irq(as5011->button_gpiod); if (irq < 0) { dev_err(&client->dev, "Failed to get irq number for button gpio\n"); error = irq; - goto err_free_button_gpio; + goto err_free_mem; } as5011->button_irq = irq; @@ -286,7 +287,7 @@ static int as5011_probe(struct i2c_client *client) if (error < 0) { dev_err(&client->dev, "Can't allocate button irq %d\n", as5011->button_irq); - goto err_free_button_gpio; + goto err_free_mem; } error = as5011_configure_chip(as5011, plat_data); @@ -317,8 +318,6 @@ err_free_axis_irq: free_irq(as5011->axis_irq, as5011); err_free_button_irq: free_irq(as5011->button_irq, as5011); -err_free_button_gpio: - gpio_free(as5011->button_gpio); err_free_mem: input_free_device(input_dev); kfree(as5011); @@ -332,7 +331,6 @@ static void as5011_remove(struct i2c_client *client) free_irq(as5011->axis_irq, as5011); free_irq(as5011->button_irq, as5011); - gpio_free(as5011->button_gpio); input_unregister_device(as5011->input_dev); kfree(as5011); diff --git a/drivers/input/joystick/pxrc.c b/drivers/input/joystick/pxrc.c index ea2bf5951d67716d332c6a8b95d6c2d23dd7b88e..52d9eab667b7acbd9d355af7ee4755d12100a806 100644 --- a/drivers/input/joystick/pxrc.c +++ b/drivers/input/joystick/pxrc.c @@ -5,15 +5,17 @@ * Copyright (C) 2018 Marcus Folkesson */ -#include +#include #include -#include +#include +#include #include +#include +#include #include + #include #include -#include -#include #define PXRC_VENDOR_ID 0x1781 #define PXRC_PRODUCT_ID 0x0898 @@ -81,33 +83,28 @@ exit: static int pxrc_open(struct input_dev *input) { struct pxrc *pxrc = input_get_drvdata(input); - int retval; + int error; - mutex_lock(&pxrc->pm_mutex); - retval = usb_submit_urb(pxrc->urb, GFP_KERNEL); - if (retval) { + guard(mutex)(&pxrc->pm_mutex); + error = usb_submit_urb(pxrc->urb, GFP_KERNEL); + if (error) { dev_err(&pxrc->intf->dev, "%s - usb_submit_urb failed, error: %d\n", - __func__, retval); - retval = -EIO; - goto out; + __func__, error); + return -EIO; } pxrc->is_open = true; - -out: - mutex_unlock(&pxrc->pm_mutex); - return retval; + return 0; } static void pxrc_close(struct input_dev *input) { struct pxrc *pxrc = input_get_drvdata(input); - mutex_lock(&pxrc->pm_mutex); + guard(mutex)(&pxrc->pm_mutex); usb_kill_urb(pxrc->urb); pxrc->is_open = false; - mutex_unlock(&pxrc->pm_mutex); } static void pxrc_free_urb(void *_pxrc) @@ -208,10 +205,9 @@ static int pxrc_suspend(struct usb_interface *intf, pm_message_t message) { struct pxrc *pxrc = usb_get_intfdata(intf); - mutex_lock(&pxrc->pm_mutex); + guard(mutex)(&pxrc->pm_mutex); if (pxrc->is_open) usb_kill_urb(pxrc->urb); - mutex_unlock(&pxrc->pm_mutex); return 0; } @@ -219,14 +215,12 @@ static int pxrc_suspend(struct usb_interface *intf, pm_message_t message) static int pxrc_resume(struct usb_interface *intf) { struct pxrc *pxrc = usb_get_intfdata(intf); - int retval = 0; - mutex_lock(&pxrc->pm_mutex); + guard(mutex)(&pxrc->pm_mutex); if (pxrc->is_open && usb_submit_urb(pxrc->urb, GFP_KERNEL) < 0) - retval = -EIO; + return -EIO; - mutex_unlock(&pxrc->pm_mutex); - return retval; + return 0; } static int pxrc_pre_reset(struct usb_interface *intf) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e2c1848182de9a44683a49f673d0ff3ff3a41999..b1244d7df6cc9e097a11257f9bb530b8954e3c12 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1670,7 +1670,7 @@ static int xpad_led_probe(struct usb_xpad *xpad) if (!led) return -ENOMEM; - xpad->pad_nr = ida_simple_get(&xpad_pad_seq, 0, 0, GFP_KERNEL); + xpad->pad_nr = ida_alloc(&xpad_pad_seq, GFP_KERNEL); if (xpad->pad_nr < 0) { error = xpad->pad_nr; goto err_free_mem; @@ -1693,7 +1693,7 @@ static int xpad_led_probe(struct usb_xpad *xpad) return 0; err_free_id: - ida_simple_remove(&xpad_pad_seq, xpad->pad_nr); + ida_free(&xpad_pad_seq, xpad->pad_nr); err_free_mem: kfree(led); xpad->led = NULL; @@ -1706,7 +1706,7 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) if (xpad_led) { led_classdev_unregister(&xpad_led->led_cdev); - ida_simple_remove(&xpad_pad_seq, xpad->pad_nr); + ida_free(&xpad_pad_seq, xpad->pad_nr); kfree(xpad_led); } } diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 786f00f6b7fd8be3e876598d30a2ba8e6615ce10..13ef6284223da30940e5a37802d04a104d2692f6 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -791,9 +791,9 @@ static bool atkbd_is_portable_device(void) * not work. So in this case simply assume a keyboard is connected to avoid * confusing some laptop keyboards. * - * Skipping ATKBD_CMD_GETID ends up using a fake keyboard id. Using a fake id is - * ok in translated mode, only atkbd_select_set() checks atkbd->id and in - * translated mode that is a no-op. + * Skipping ATKBD_CMD_GETID ends up using a fake keyboard id. Using the standard + * 0xab83 id is ok in translated mode, only atkbd_select_set() checks atkbd->id + * and in translated mode that is a no-op. */ static bool atkbd_skip_getid(struct atkbd *atkbd) { @@ -811,6 +811,7 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; + bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -832,7 +833,8 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - if (atkbd_skip_getid(atkbd) || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + skip_getid = atkbd_skip_getid(atkbd); + if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* * If the get ID command was skipped or failed, we check if we can at least set @@ -842,7 +844,7 @@ static int atkbd_probe(struct atkbd *atkbd) param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = 0xabba; + atkbd->id = skip_getid ? 0xab83 : 0xabba; return 0; } diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c index 1b4937dce6725f8a94da1eaec79d1987dc6920a6..52fba9ee7c1d86275efa779f0980d0283ccd5265 100644 --- a/drivers/input/keyboard/cap11xx.c +++ b/drivers/input/keyboard/cap11xx.c @@ -10,10 +10,11 @@ #include #include #include -#include +#include #include #include #include +#include #define CAP11XX_REG_MAIN_CONTROL 0x00 #define CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT (6) @@ -24,6 +25,7 @@ #define CAP11XX_REG_NOISE_FLAG_STATUS 0x0a #define CAP11XX_REG_SENOR_DELTA(X) (0x10 + (X)) #define CAP11XX_REG_SENSITIVITY_CONTROL 0x1f +#define CAP11XX_REG_SENSITIVITY_CONTROL_DELTA_SENSE_MASK 0x70 #define CAP11XX_REG_CONFIG 0x20 #define CAP11XX_REG_SENSOR_ENABLE 0x21 #define CAP11XX_REG_SENSOR_CONFIG 0x22 @@ -32,6 +34,7 @@ #define CAP11XX_REG_CALIBRATION 0x26 #define CAP11XX_REG_INT_ENABLE 0x27 #define CAP11XX_REG_REPEAT_RATE 0x28 +#define CAP11XX_REG_SIGNAL_GUARD_ENABLE 0x29 #define CAP11XX_REG_MT_CONFIG 0x2a #define CAP11XX_REG_MT_PATTERN_CONFIG 0x2b #define CAP11XX_REG_MT_PATTERN 0x2d @@ -47,6 +50,8 @@ #define CAP11XX_REG_SENSOR_BASE_CNT(X) (0x50 + (X)) #define CAP11XX_REG_LED_POLARITY 0x73 #define CAP11XX_REG_LED_OUTPUT_CONTROL 0x74 +#define CAP11XX_REG_CALIB_SENSITIVITY_CONFIG 0x80 +#define CAP11XX_REG_CALIB_SENSITIVITY_CONFIG2 0x81 #define CAP11XX_REG_LED_DUTY_CYCLE_1 0x90 #define CAP11XX_REG_LED_DUTY_CYCLE_2 0x91 @@ -78,12 +83,20 @@ struct cap11xx_led { struct cap11xx_priv { struct regmap *regmap; + struct device *dev; struct input_dev *idev; + const struct cap11xx_hw_model *model; + u8 id; struct cap11xx_led *leds; int num_leds; /* config */ + u8 analog_gain; + u8 sensitivity_delta_sense; + u8 signal_guard_inputs_mask; + u32 thresholds[8]; + u32 calib_sensitivities[8]; u32 keycodes[]; }; @@ -160,9 +173,6 @@ static bool cap11xx_volatile_reg(struct device *dev, unsigned int reg) case CAP11XX_REG_SENOR_DELTA(3): case CAP11XX_REG_SENOR_DELTA(4): case CAP11XX_REG_SENOR_DELTA(5): - case CAP11XX_REG_PRODUCT_ID: - case CAP11XX_REG_MANUFACTURER_ID: - case CAP11XX_REG_REVISION: return true; } @@ -177,10 +187,179 @@ static const struct regmap_config cap11xx_regmap_config = { .reg_defaults = cap11xx_reg_defaults, .num_reg_defaults = ARRAY_SIZE(cap11xx_reg_defaults), - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, .volatile_reg = cap11xx_volatile_reg, }; +static int cap11xx_write_calib_sens_config_1(struct cap11xx_priv *priv) +{ + return regmap_write(priv->regmap, + CAP11XX_REG_CALIB_SENSITIVITY_CONFIG, + (priv->calib_sensitivities[3] << 6) | + (priv->calib_sensitivities[2] << 4) | + (priv->calib_sensitivities[1] << 2) | + priv->calib_sensitivities[0]); +} + +static int cap11xx_write_calib_sens_config_2(struct cap11xx_priv *priv) +{ + return regmap_write(priv->regmap, + CAP11XX_REG_CALIB_SENSITIVITY_CONFIG2, + (priv->calib_sensitivities[7] << 6) | + (priv->calib_sensitivities[6] << 4) | + (priv->calib_sensitivities[5] << 2) | + priv->calib_sensitivities[4]); +} + +static int cap11xx_init_keys(struct cap11xx_priv *priv) +{ + struct device_node *node = priv->dev->of_node; + struct device *dev = priv->dev; + int i, error; + u32 u32_val; + + if (!node) { + dev_err(dev, "Corresponding DT entry is not available\n"); + return -ENODEV; + } + + if (!of_property_read_u32(node, "microchip,sensor-gain", &u32_val)) { + if (priv->model->no_gain) { + dev_warn(dev, + "This model doesn't support 'sensor-gain'\n"); + } else if (is_power_of_2(u32_val) && u32_val <= 8) { + priv->analog_gain = (u8)ilog2(u32_val); + + error = regmap_update_bits(priv->regmap, + CAP11XX_REG_MAIN_CONTROL, + CAP11XX_REG_MAIN_CONTROL_GAIN_MASK, + priv->analog_gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT); + if (error) + return error; + } else { + dev_err(dev, "Invalid sensor-gain value %u\n", u32_val); + return -EINVAL; + } + } + + if (of_property_read_bool(node, "microchip,irq-active-high")) { + if (priv->id == CAP1106 || + priv->id == CAP1126 || + priv->id == CAP1188) { + error = regmap_update_bits(priv->regmap, + CAP11XX_REG_CONFIG2, + CAP11XX_REG_CONFIG2_ALT_POL, + 0); + if (error) + return error; + } else { + dev_warn(dev, + "This model doesn't support 'irq-active-high'\n"); + } + } + + if (!of_property_read_u32(node, "microchip,sensitivity-delta-sense", &u32_val)) { + if (!is_power_of_2(u32_val) || u32_val > 128) { + dev_err(dev, "Invalid sensitivity-delta-sense value %u\n", u32_val); + return -EINVAL; + } + + priv->sensitivity_delta_sense = (u8)ilog2(u32_val); + u32_val = ~(FIELD_PREP(CAP11XX_REG_SENSITIVITY_CONTROL_DELTA_SENSE_MASK, + priv->sensitivity_delta_sense)); + + error = regmap_update_bits(priv->regmap, + CAP11XX_REG_SENSITIVITY_CONTROL, + CAP11XX_REG_SENSITIVITY_CONTROL_DELTA_SENSE_MASK, + u32_val); + if (error) + return error; + } + + if (!of_property_read_u32_array(node, "microchip,input-threshold", + priv->thresholds, priv->model->num_channels)) { + for (i = 0; i < priv->model->num_channels; i++) { + if (priv->thresholds[i] > 127) { + dev_err(dev, "Invalid input-threshold value %u\n", + priv->thresholds[i]); + return -EINVAL; + } + + error = regmap_write(priv->regmap, + CAP11XX_REG_SENSOR_THRESH(i), + priv->thresholds[i]); + if (error) + return error; + } + } + + if (!of_property_read_u32_array(node, "microchip,calib-sensitivity", + priv->calib_sensitivities, + priv->model->num_channels)) { + if (priv->id == CAP1293 || priv->id == CAP1298) { + for (i = 0; i < priv->model->num_channels; i++) { + if (!is_power_of_2(priv->calib_sensitivities[i]) || + priv->calib_sensitivities[i] > 4) { + dev_err(dev, "Invalid calib-sensitivity value %u\n", + priv->calib_sensitivities[i]); + return -EINVAL; + } + priv->calib_sensitivities[i] = ilog2(priv->calib_sensitivities[i]); + } + + error = cap11xx_write_calib_sens_config_1(priv); + if (error) + return error; + + if (priv->id == CAP1298) { + error = cap11xx_write_calib_sens_config_2(priv); + if (error) + return error; + } + } else { + dev_warn(dev, + "This model doesn't support 'calib-sensitivity'\n"); + } + } + + for (i = 0; i < priv->model->num_channels; i++) { + if (!of_property_read_u32_index(node, "microchip,signal-guard", + i, &u32_val)) { + if (u32_val > 1) + return -EINVAL; + if (u32_val) + priv->signal_guard_inputs_mask |= 0x01 << i; + } + } + + if (priv->signal_guard_inputs_mask) { + if (priv->id == CAP1293 || priv->id == CAP1298) { + error = regmap_write(priv->regmap, + CAP11XX_REG_SIGNAL_GUARD_ENABLE, + priv->signal_guard_inputs_mask); + if (error) + return error; + } else { + dev_warn(dev, + "This model doesn't support 'signal-guard'\n"); + } + } + + /* Provide some useful defaults */ + for (i = 0; i < priv->model->num_channels; i++) + priv->keycodes[i] = KEY_A + i; + + of_property_read_u32_array(node, "linux,keycodes", + priv->keycodes, priv->model->num_channels); + + /* Disable autorepeat. The Linux input system has its own handling. */ + error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0); + if (error) + return error; + + return 0; +} + static irqreturn_t cap11xx_thread_func(int irq_num, void *data) { struct cap11xx_priv *priv = data; @@ -332,11 +511,9 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client) const struct i2c_device_id *id = i2c_client_get_device_id(i2c_client); struct device *dev = &i2c_client->dev; struct cap11xx_priv *priv; - struct device_node *node; const struct cap11xx_hw_model *cap; - int i, error, irq, gain = 0; + int i, error; unsigned int val, rev; - u32 gain32; if (id->driver_data >= ARRAY_SIZE(cap11xx_devices)) { dev_err(dev, "Invalid device ID %lu\n", id->driver_data); @@ -355,6 +532,8 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client) if (!priv) return -ENOMEM; + priv->dev = dev; + priv->regmap = devm_regmap_init_i2c(i2c_client, &cap11xx_regmap_config); if (IS_ERR(priv->regmap)) return PTR_ERR(priv->regmap); @@ -384,50 +563,15 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client) return error; dev_info(dev, "CAP11XX detected, model %s, revision 0x%02x\n", - id->name, rev); - node = dev->of_node; - - if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) { - if (cap->no_gain) - dev_warn(dev, - "This version doesn't support sensor gain\n"); - else if (is_power_of_2(gain32) && gain32 <= 8) - gain = ilog2(gain32); - else - dev_err(dev, "Invalid sensor-gain value %d\n", gain32); - } + id->name, rev); - if (id->driver_data == CAP1106 || - id->driver_data == CAP1126 || - id->driver_data == CAP1188) { - if (of_property_read_bool(node, "microchip,irq-active-high")) { - error = regmap_update_bits(priv->regmap, - CAP11XX_REG_CONFIG2, - CAP11XX_REG_CONFIG2_ALT_POL, - 0); - if (error) - return error; - } - } - - /* Provide some useful defaults */ - for (i = 0; i < cap->num_channels; i++) - priv->keycodes[i] = KEY_A + i; - - of_property_read_u32_array(node, "linux,keycodes", - priv->keycodes, cap->num_channels); + priv->model = cap; + priv->id = id->driver_data; - if (!cap->no_gain) { - error = regmap_update_bits(priv->regmap, - CAP11XX_REG_MAIN_CONTROL, - CAP11XX_REG_MAIN_CONTROL_GAIN_MASK, - gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT); - if (error) - return error; - } + dev_info(dev, "CAP11XX device detected, model %s, revision 0x%02x\n", + id->name, rev); - /* Disable autorepeat. The Linux input system has its own handling. */ - error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0); + error = cap11xx_init_keys(priv); if (error) return error; @@ -439,7 +583,7 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client) priv->idev->id.bustype = BUS_I2C; priv->idev->evbit[0] = BIT_MASK(EV_KEY); - if (of_property_read_bool(node, "autorepeat")) + if (of_property_read_bool(dev->of_node, "autorepeat")) __set_bit(EV_REP, priv->idev->evbit); for (i = 0; i < cap->num_channels; i++) @@ -474,13 +618,8 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client) if (error) return error; - irq = irq_of_parse_and_map(node, 0); - if (!irq) { - dev_err(dev, "Unable to parse or map IRQ\n"); - return -ENXIO; - } - - error = devm_request_threaded_irq(dev, irq, NULL, cap11xx_thread_func, + error = devm_request_threaded_irq(dev, i2c_client->irq, + NULL, cap11xx_thread_func, IRQF_ONESHOT, dev_name(dev), priv); if (error) return error; diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 2e7c2c046e675f8ae21a6fe668e9b7dfa23c1a9a..9f3bcd41cf67da810c3aad9fe535feea6a89ea48 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -45,7 +45,9 @@ struct gpio_button_data { unsigned int software_debounce; /* in msecs, for GPIO-driven buttons */ unsigned int irq; + unsigned int wakeirq; unsigned int wakeup_trigger_type; + spinlock_t lock; bool disabled; bool key_pressed; @@ -511,6 +513,7 @@ static int gpio_keys_setup_key(struct platform_device *pdev, struct gpio_button_data *bdata = &ddata->data[idx]; irq_handler_t isr; unsigned long irqflags; + const char *wakedesc; int irq; int error; @@ -575,15 +578,23 @@ static int gpio_keys_setup_key(struct platform_device *pdev, !gpiod_cansleep(bdata->gpiod); } + /* + * If an interrupt was specified, use it instead of the gpio + * interrupt and use the gpio for reading the state. A separate + * interrupt may be used as the main button interrupt for + * runtime PM to detect events also in deeper idle states. If a + * dedicated wakeirq is used for system suspend only, see below + * for bdata->wakeirq setup. + */ if (button->irq) { bdata->irq = button->irq; } else { irq = gpiod_to_irq(bdata->gpiod); if (irq < 0) { error = irq; - dev_err(dev, - "Unable to get irq number for GPIO %d, error %d\n", - button->gpio, error); + dev_err_probe(dev, error, + "Unable to get irq number for GPIO %d\n", + button->gpio); return error; } bdata->irq = irq; @@ -672,6 +683,36 @@ static int gpio_keys_setup_key(struct platform_device *pdev, return error; } + if (!button->wakeirq) + return 0; + + /* Use :wakeup suffix like drivers/base/power/wakeirq.c does */ + wakedesc = devm_kasprintf(dev, GFP_KERNEL, "%s:wakeup", desc); + if (!wakedesc) + return -ENOMEM; + + bdata->wakeirq = button->wakeirq; + irqflags |= IRQF_NO_SUSPEND; + + /* + * Wakeirq shares the handler with the main interrupt, it's only + * active during system suspend. See gpio_keys_button_enable_wakeup() + * and gpio_keys_button_disable_wakeup(). + */ + error = devm_request_any_context_irq(dev, bdata->wakeirq, isr, + irqflags, wakedesc, bdata); + if (error < 0) { + dev_err(dev, "Unable to claim wakeirq %d; error %d\n", + bdata->irq, error); + return error; + } + + /* + * Disable wakeirq until suspend. IRQF_NO_AUTOEN won't work if + * IRQF_SHARED was set based on !button->can_disable. + */ + disable_irq(bdata->wakeirq); + return 0; } @@ -728,7 +769,7 @@ gpio_keys_get_devtree_pdata(struct device *dev) struct gpio_keys_platform_data *pdata; struct gpio_keys_button *button; struct fwnode_handle *child; - int nbuttons; + int nbuttons, irq; nbuttons = device_get_child_node_count(dev); if (nbuttons == 0) @@ -750,9 +791,19 @@ gpio_keys_get_devtree_pdata(struct device *dev) device_property_read_string(dev, "label", &pdata->name); device_for_each_child_node(dev, child) { - if (is_of_node(child)) - button->irq = - irq_of_parse_and_map(to_of_node(child), 0); + if (is_of_node(child)) { + irq = of_irq_get_byname(to_of_node(child), "irq"); + if (irq > 0) + button->irq = irq; + + irq = of_irq_get_byname(to_of_node(child), "wakeup"); + if (irq > 0) + button->wakeirq = irq; + + if (!button->irq && !button->wakeirq) + button->irq = + irq_of_parse_and_map(to_of_node(child), 0); + } if (fwnode_property_read_u32(child, "linux,code", &button->code)) { @@ -921,6 +972,11 @@ gpio_keys_button_enable_wakeup(struct gpio_button_data *bdata) } } + if (bdata->wakeirq) { + enable_irq(bdata->wakeirq); + disable_irq(bdata->irq); + } + return 0; } @@ -929,6 +985,11 @@ gpio_keys_button_disable_wakeup(struct gpio_button_data *bdata) { int error; + if (bdata->wakeirq) { + enable_irq(bdata->irq); + disable_irq(bdata->wakeirq); + } + /* * The trigger type is always both edges for gpio-based keys and we do * not support changing wakeup trigger for interrupt-based keys. diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index 454fb8675657302ca1281c211a2be027fc520163..16f936db73058e948505f4479849b2c753515250 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -49,9 +48,6 @@ struct omap_kp { static DECLARE_TASKLET_DISABLED_OLD(kp_tasklet, omap_kp_tasklet); -static unsigned int *row_gpios; -static unsigned int *col_gpios; - static irqreturn_t omap_kp_interrupt(int irq, void *dev_id) { /* disable keyboard interrupt and schedule for handling */ @@ -180,7 +176,7 @@ static int omap_kp_probe(struct platform_device *pdev) struct omap_kp *omap_kp; struct input_dev *input_dev; struct omap_kp_platform_data *pdata = dev_get_platdata(&pdev->dev); - int i, col_idx, row_idx, ret; + int ret; unsigned int row_shift, keycodemax; if (!pdata->rows || !pdata->cols || !pdata->keymap_data) { @@ -209,17 +205,9 @@ static int omap_kp_probe(struct platform_device *pdev) if (pdata->delay) omap_kp->delay = pdata->delay; - if (pdata->row_gpios && pdata->col_gpios) { - row_gpios = pdata->row_gpios; - col_gpios = pdata->col_gpios; - } - omap_kp->rows = pdata->rows; omap_kp->cols = pdata->cols; - col_idx = 0; - row_idx = 0; - timer_setup(&omap_kp->timer, omap_kp_timer, 0); /* get the irq and init timer*/ @@ -276,11 +264,6 @@ err4: err3: device_remove_file(&pdev->dev, &dev_attr_enable); err2: - for (i = row_idx - 1; i >= 0; i--) - gpio_free(row_gpios[i]); - for (i = col_idx - 1; i >= 0; i--) - gpio_free(col_gpios[i]); - kfree(omap_kp); input_free_device(input_dev); diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index d3f8688fdd9c3ebe03a0c97fb32283f4cd4190ac..040b340995d89d4bae21630f6218f0bd17337684 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ struct omap4_keypad { bool no_autorepeat; u64 keys; unsigned short *keymap; + struct clk *fck; }; static int kbd_readl(struct omap4_keypad *keypad_data, u32 offset) @@ -209,6 +211,10 @@ static int omap4_keypad_open(struct input_dev *input) if (error) return error; + error = clk_prepare_enable(keypad_data->fck); + if (error) + goto out; + disable_irq(keypad_data->irq); kbd_writel(keypad_data, OMAP4_KBD_CTRL, @@ -226,10 +232,11 @@ static int omap4_keypad_open(struct input_dev *input) enable_irq(keypad_data->irq); +out: pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); - return 0; + return error; } static void omap4_keypad_stop(struct omap4_keypad *keypad_data) @@ -258,6 +265,7 @@ static void omap4_keypad_close(struct input_dev *input) disable_irq(keypad_data->irq); omap4_keypad_stop(keypad_data); enable_irq(keypad_data->irq); + clk_disable_unprepare(keypad_data->fck); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); @@ -356,6 +364,11 @@ static int omap4_keypad_probe(struct platform_device *pdev) } keypad_data->irq = irq; + keypad_data->fck = devm_clk_get(&pdev->dev, "fck"); + if (IS_ERR(keypad_data->fck)) + return dev_err_probe(&pdev->dev, PTR_ERR(keypad_data->fck), + "unable to get fck"); + mutex_init(&keypad_data->lock); platform_set_drvdata(pdev, keypad_data); diff --git a/drivers/input/keyboard/qt1050.c b/drivers/input/keyboard/qt1050.c index 6953097db4456f27cc698ce208841280762f37ca..b51dfcd76038623c8ce321901c8201a05416d347 100644 --- a/drivers/input/keyboard/qt1050.c +++ b/drivers/input/keyboard/qt1050.c @@ -213,7 +213,7 @@ static struct regmap_config qt1050_regmap_config = { .val_bits = 8, .max_register = QT1050_RES_CAL, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, .wr_table = &qt1050_writeable_table, .rd_table = &qt1050_readable_table, diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c index 8af59ced1ec2eda1365f5636ac91d48984da29da..677bc4baa5d19521e010f68f388c780917c9c9c9 100644 --- a/drivers/input/keyboard/tca6416-keypad.c +++ b/drivers/input/keyboard/tca6416-keypad.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c index 74808bae326a710779cdfa455ff5781739bf6948..c338765e0ecd032ab541ad70020cec84bea9c296 100644 --- a/drivers/input/misc/da9063_onkey.c +++ b/drivers/input/misc/da9063_onkey.c @@ -9,11 +9,12 @@ #include #include #include +#include #include #include +#include #include #include -#include #include #include #include @@ -74,13 +75,6 @@ static const struct da906x_chip_config da9062_regs = { .name = "da9062-onkey", }; -static const struct of_device_id da9063_compatible_reg_id_table[] = { - { .compatible = "dlg,da9063-onkey", .data = &da9063_regs }, - { .compatible = "dlg,da9062-onkey", .data = &da9062_regs }, - { }, -}; -MODULE_DEVICE_TABLE(of, da9063_compatible_reg_id_table); - static void da9063_poll_on(struct work_struct *work) { struct da9063_onkey *onkey = container_of(work, @@ -187,56 +181,43 @@ static irqreturn_t da9063_onkey_irq_handler(int irq, void *data) static int da9063_onkey_probe(struct platform_device *pdev) { struct da9063_onkey *onkey; - const struct of_device_id *match; - int irq; int error; - - match = of_match_node(da9063_compatible_reg_id_table, - pdev->dev.of_node); - if (!match) - return -ENXIO; + int irq; onkey = devm_kzalloc(&pdev->dev, sizeof(struct da9063_onkey), GFP_KERNEL); - if (!onkey) { - dev_err(&pdev->dev, "Failed to allocate memory.\n"); + if (!onkey) return -ENOMEM; - } - onkey->config = match->data; + onkey->config = device_get_match_data(&pdev->dev); + if (!onkey->config) + return -ENXIO; + onkey->dev = &pdev->dev; onkey->regmap = dev_get_regmap(pdev->dev.parent, NULL); - if (!onkey->regmap) { - dev_err(&pdev->dev, "Parent regmap unavailable.\n"); - return -ENXIO; - } + if (!onkey->regmap) + return dev_err_probe(&pdev->dev, -ENXIO, + "Parent regmap unavailable.\n"); - onkey->key_power = !of_property_read_bool(pdev->dev.of_node, - "dlg,disable-key-power"); + onkey->key_power = !device_property_read_bool(&pdev->dev, + "dlg,disable-key-power"); onkey->input = devm_input_allocate_device(&pdev->dev); - if (!onkey->input) { - dev_err(&pdev->dev, "Failed to allocated input device.\n"); + if (!onkey->input) return -ENOMEM; - } onkey->input->name = onkey->config->name; snprintf(onkey->phys, sizeof(onkey->phys), "%s/input0", onkey->config->name); onkey->input->phys = onkey->phys; - onkey->input->dev.parent = &pdev->dev; input_set_capability(onkey->input, EV_KEY, KEY_POWER); error = devm_delayed_work_autocancel(&pdev->dev, &onkey->work, da9063_poll_on); - if (error) { - dev_err(&pdev->dev, - "Failed to add cancel poll action: %d\n", - error); + if (error) return error; - } irq = platform_get_irq_byname(pdev, "ONKEY"); if (irq < 0) @@ -246,11 +227,9 @@ static int da9063_onkey_probe(struct platform_device *pdev) NULL, da9063_onkey_irq_handler, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "ONKEY", onkey); - if (error) { - dev_err(&pdev->dev, - "Failed to request IRQ %d: %d\n", irq, error); - return error; - } + if (error) + return dev_err_probe(&pdev->dev, error, + "Failed to allocate onkey IRQ\n"); error = dev_pm_set_wake_irq(&pdev->dev, irq); if (error) @@ -261,15 +240,19 @@ static int da9063_onkey_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, true); error = input_register_device(onkey->input); - if (error) { - dev_err(&pdev->dev, - "Failed to register input device: %d\n", error); + if (error) return error; - } return 0; } +static const struct of_device_id da9063_compatible_reg_id_table[] = { + { .compatible = "dlg,da9063-onkey", .data = &da9063_regs }, + { .compatible = "dlg,da9062-onkey", .data = &da9062_regs }, + { } +}; +MODULE_DEVICE_TABLE(of, da9063_compatible_reg_id_table); + static struct platform_driver da9063_onkey_driver = { .probe = da9063_onkey_probe, .driver = { diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index b2f1292e27ef7d5e9d6c0160c05479bd8f144e1d..6e8cc28debd979eb9b0b276811c615d9a59be850 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1050,7 +1050,7 @@ static ssize_t ims_pcu_attribute_show(struct device *dev, container_of(dattr, struct ims_pcu_attribute, dattr); char *field = (char *)pcu + attr->field_offset; - return scnprintf(buf, PAGE_SIZE, "%.*s\n", attr->field_length, field); + return sysfs_emit(buf, "%.*s\n", attr->field_length, field); } static ssize_t ims_pcu_attribute_store(struct device *dev, @@ -1206,7 +1206,7 @@ ims_pcu_update_firmware_status_show(struct device *dev, struct usb_interface *intf = to_usb_interface(dev); struct ims_pcu *pcu = usb_get_intfdata(intf); - return scnprintf(buf, PAGE_SIZE, "%d\n", pcu->update_firmware_status); + return sysfs_emit(buf, "%d\n", pcu->update_firmware_status); } static DEVICE_ATTR(update_firmware_status, S_IRUGO, @@ -1309,7 +1309,7 @@ static ssize_t ims_pcu_ofn_reg_data_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%x\n", data); + return sysfs_emit(buf, "%x\n", data); } static ssize_t ims_pcu_ofn_reg_data_store(struct device *dev, @@ -1344,7 +1344,7 @@ static ssize_t ims_pcu_ofn_reg_addr_show(struct device *dev, int error; mutex_lock(&pcu->cmd_mutex); - error = scnprintf(buf, PAGE_SIZE, "%x\n", pcu->ofn_reg_addr); + error = sysfs_emit(buf, "%x\n", pcu->ofn_reg_addr); mutex_unlock(&pcu->cmd_mutex); return error; @@ -1397,7 +1397,7 @@ static ssize_t ims_pcu_ofn_bit_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%d\n", !!(data & (1 << attr->nr))); + return sysfs_emit(buf, "%d\n", !!(data & (1 << attr->nr))); } static ssize_t ims_pcu_ofn_bit_store(struct device *dev, diff --git a/drivers/input/misc/iqs269a.c b/drivers/input/misc/iqs269a.c index 3c636c75e8a1f1520ab2c5c3bec4004e023080c8..cd14ff9f57cf239819d76cf3bae0da8c13524655 100644 --- a/drivers/input/misc/iqs269a.c +++ b/drivers/input/misc/iqs269a.c @@ -9,6 +9,7 @@ * axial sliders presented by the device. */ +#include #include #include #include @@ -26,6 +27,8 @@ #define IQS269_VER_INFO 0x00 #define IQS269_VER_INFO_PROD_NUM 0x4F +#define IQS269_VER_INFO_FW_NUM_2 0x03 +#define IQS269_VER_INFO_FW_NUM_3 0x10 #define IQS269_SYS_FLAGS 0x02 #define IQS269_SYS_FLAGS_SHOW_RESET BIT(15) @@ -53,6 +56,7 @@ #define IQS269_SYS_SETTINGS_ULP_UPDATE_MASK GENMASK(10, 8) #define IQS269_SYS_SETTINGS_ULP_UPDATE_SHIFT 8 #define IQS269_SYS_SETTINGS_ULP_UPDATE_MAX 7 +#define IQS269_SYS_SETTINGS_SLIDER_SWIPE BIT(7) #define IQS269_SYS_SETTINGS_RESEED_OFFSET BIT(6) #define IQS269_SYS_SETTINGS_EVENT_MODE BIT(5) #define IQS269_SYS_SETTINGS_EVENT_MODE_LP BIT(4) @@ -69,6 +73,7 @@ #define IQS269_FILT_STR_MAX 3 #define IQS269_EVENT_MASK_SYS BIT(6) +#define IQS269_EVENT_MASK_GESTURE BIT(3) #define IQS269_EVENT_MASK_DEEP BIT(2) #define IQS269_EVENT_MASK_TOUCH BIT(1) #define IQS269_EVENT_MASK_PROX BIT(0) @@ -97,6 +102,15 @@ #define IQS269_MISC_B_TRACKING_UI_ENABLE BIT(4) #define IQS269_MISC_B_FILT_STR_SLIDER GENMASK(1, 0) +#define IQS269_TOUCH_HOLD_SLIDER_SEL 0x89 +#define IQS269_TOUCH_HOLD_DEFAULT 0x14 +#define IQS269_TOUCH_HOLD_MS_MIN 256 +#define IQS269_TOUCH_HOLD_MS_MAX 65280 + +#define IQS269_TIMEOUT_TAP_MS_MAX 4080 +#define IQS269_TIMEOUT_SWIPE_MS_MAX 4080 +#define IQS269_THRESH_SWIPE_MAX 255 + #define IQS269_CHx_ENG_A_MEAS_CAP_SIZE BIT(15) #define IQS269_CHx_ENG_A_RX_GND_INACTIVE BIT(13) #define IQS269_CHx_ENG_A_LOCAL_CAP_SIZE BIT(12) @@ -142,6 +156,10 @@ #define IQS269_MAX_REG 0xFF +#define IQS269_OTP_OPTION_DEFAULT 0x00 +#define IQS269_OTP_OPTION_TWS 0xD0 +#define IQS269_OTP_OPTION_HOLD BIT(7) + #define IQS269_NUM_CH 8 #define IQS269_NUM_SL 2 @@ -175,6 +193,20 @@ enum iqs269_event_id { IQS269_EVENT_DEEP_UP, }; +enum iqs269_slider_id { + IQS269_SLIDER_NONE, + IQS269_SLIDER_KEY, + IQS269_SLIDER_RAW, +}; + +enum iqs269_gesture_id { + IQS269_GESTURE_TAP, + IQS269_GESTURE_HOLD, + IQS269_GESTURE_FLICK_POS, + IQS269_GESTURE_FLICK_NEG, + IQS269_NUM_GESTURES, +}; + struct iqs269_switch_desc { unsigned int code; bool enabled; @@ -234,7 +266,7 @@ struct iqs269_ver_info { u8 prod_num; u8 sw_num; u8 hw_num; - u8 padding; + u8 fw_num; } __packed; struct iqs269_ch_reg { @@ -285,16 +317,42 @@ struct iqs269_private { struct regmap *regmap; struct mutex lock; struct iqs269_switch_desc switches[ARRAY_SIZE(iqs269_events)]; + struct iqs269_ver_info ver_info; struct iqs269_sys_reg sys_reg; struct completion ati_done; struct input_dev *keypad; struct input_dev *slider[IQS269_NUM_SL]; unsigned int keycode[ARRAY_SIZE(iqs269_events) * IQS269_NUM_CH]; + unsigned int sl_code[IQS269_NUM_SL][IQS269_NUM_GESTURES]; + unsigned int otp_option; unsigned int ch_num; bool hall_enable; bool ati_current; }; +static enum iqs269_slider_id iqs269_slider_type(struct iqs269_private *iqs269, + int slider_num) +{ + int i; + + /* + * Slider 1 is unavailable if the touch-and-hold option is enabled via + * OTP. In that case, the channel selection register is repurposed for + * the touch-and-hold timer ceiling. + */ + if (slider_num && (iqs269->otp_option & IQS269_OTP_OPTION_HOLD)) + return IQS269_SLIDER_NONE; + + if (!iqs269->sys_reg.slider_select[slider_num]) + return IQS269_SLIDER_NONE; + + for (i = 0; i < IQS269_NUM_GESTURES; i++) + if (iqs269->sl_code[slider_num][i] != KEY_RESERVED) + return IQS269_SLIDER_KEY; + + return IQS269_SLIDER_RAW; +} + static int iqs269_ati_mode_set(struct iqs269_private *iqs269, unsigned int ch_num, unsigned int mode) { @@ -525,7 +583,8 @@ static int iqs269_parse_chan(struct iqs269_private *iqs269, if (fwnode_property_present(ch_node, "azoteq,slider0-select")) iqs269->sys_reg.slider_select[0] |= BIT(reg); - if (fwnode_property_present(ch_node, "azoteq,slider1-select")) + if (fwnode_property_present(ch_node, "azoteq,slider1-select") && + !(iqs269->otp_option & IQS269_OTP_OPTION_HOLD)) iqs269->sys_reg.slider_select[1] |= BIT(reg); ch_reg = &iqs269->sys_reg.ch_reg[reg]; @@ -950,7 +1009,43 @@ static int iqs269_parse_prop(struct iqs269_private *iqs269) sys_reg->blocking = 0; sys_reg->slider_select[0] = 0; - sys_reg->slider_select[1] = 0; + + /* + * If configured via OTP to do so, the device asserts a pulse on the + * GPIO4 pin for approximately 60 ms once a selected channel is held + * in a state of touch for a configurable length of time. + * + * In that case, the register used for slider 1 channel selection is + * repurposed for the touch-and-hold timer ceiling. + */ + if (iqs269->otp_option & IQS269_OTP_OPTION_HOLD) { + if (!device_property_read_u32(&client->dev, + "azoteq,touch-hold-ms", &val)) { + if (val < IQS269_TOUCH_HOLD_MS_MIN || + val > IQS269_TOUCH_HOLD_MS_MAX) { + dev_err(&client->dev, + "Invalid touch-and-hold ceiling: %u\n", + val); + return -EINVAL; + } + + sys_reg->slider_select[1] = val / 256; + } else if (iqs269->ver_info.fw_num < IQS269_VER_INFO_FW_NUM_3) { + /* + * The default touch-and-hold timer ceiling initially + * read from early revisions of silicon is invalid if + * the device experienced a soft reset between power- + * on and the read operation. + * + * To protect against this case, explicitly cache the + * default value so that it is restored each time the + * device is re-initialized. + */ + sys_reg->slider_select[1] = IQS269_TOUCH_HOLD_DEFAULT; + } + } else { + sys_reg->slider_select[1] = 0; + } sys_reg->event_mask = ~((u8)IQS269_EVENT_MASK_SYS); @@ -1004,6 +1099,76 @@ static int iqs269_parse_prop(struct iqs269_private *iqs269) general |= (val << IQS269_SYS_SETTINGS_ULP_UPDATE_SHIFT); } + if (device_property_present(&client->dev, "linux,keycodes")) { + int scale = 1; + int count = device_property_count_u32(&client->dev, + "linux,keycodes"); + if (count > IQS269_NUM_GESTURES * IQS269_NUM_SL) { + dev_err(&client->dev, "Too many keycodes present\n"); + return -EINVAL; + } else if (count < 0) { + dev_err(&client->dev, "Failed to count keycodes: %d\n", + count); + return count; + } + + error = device_property_read_u32_array(&client->dev, + "linux,keycodes", + *iqs269->sl_code, count); + if (error) { + dev_err(&client->dev, "Failed to read keycodes: %d\n", + error); + return error; + } + + if (device_property_present(&client->dev, + "azoteq,gesture-swipe")) + general |= IQS269_SYS_SETTINGS_SLIDER_SWIPE; + + /* + * Early revisions of silicon use a more granular step size for + * tap and swipe gesture timeouts; scale them appropriately. + */ + if (iqs269->ver_info.fw_num < IQS269_VER_INFO_FW_NUM_3) + scale = 4; + + if (!device_property_read_u32(&client->dev, + "azoteq,timeout-tap-ms", &val)) { + if (val > IQS269_TIMEOUT_TAP_MS_MAX / scale) { + dev_err(&client->dev, "Invalid timeout: %u\n", + val); + return -EINVAL; + } + + sys_reg->timeout_tap = val / (16 / scale); + } + + if (!device_property_read_u32(&client->dev, + "azoteq,timeout-swipe-ms", + &val)) { + if (val > IQS269_TIMEOUT_SWIPE_MS_MAX / scale) { + dev_err(&client->dev, "Invalid timeout: %u\n", + val); + return -EINVAL; + } + + sys_reg->timeout_swipe = val / (16 / scale); + } + + if (!device_property_read_u32(&client->dev, + "azoteq,thresh-swipe", &val)) { + if (val > IQS269_THRESH_SWIPE_MAX) { + dev_err(&client->dev, "Invalid threshold: %u\n", + val); + return -EINVAL; + } + + sys_reg->thresh_swipe = val; + } + + sys_reg->event_mask &= ~IQS269_EVENT_MASK_GESTURE; + } + general &= ~IQS269_SYS_SETTINGS_RESEED_OFFSET; if (device_property_present(&client->dev, "azoteq,reseed-offset")) general |= IQS269_SYS_SETTINGS_RESEED_OFFSET; @@ -1012,10 +1177,11 @@ static int iqs269_parse_prop(struct iqs269_private *iqs269) /* * As per the datasheet, enable streaming during normal-power mode if - * either slider is in use. In that case, the device returns to event - * mode during low-power mode. + * raw coordinates will be read from either slider. In that case, the + * device returns to event mode during low-power mode. */ - if (sys_reg->slider_select[0] || sys_reg->slider_select[1]) + if (iqs269_slider_type(iqs269, 0) == IQS269_SLIDER_RAW || + iqs269_slider_type(iqs269, 1) == IQS269_SLIDER_RAW) general |= IQS269_SYS_SETTINGS_EVENT_MODE_LP; general |= IQS269_SYS_SETTINGS_REDO_ATI; @@ -1026,12 +1192,30 @@ static int iqs269_parse_prop(struct iqs269_private *iqs269) return 0; } +static const struct reg_sequence iqs269_tws_init[] = { + { IQS269_TOUCH_HOLD_SLIDER_SEL, IQS269_TOUCH_HOLD_DEFAULT }, + { 0xF0, 0x580F }, + { 0xF0, 0x59EF }, +}; + static int iqs269_dev_init(struct iqs269_private *iqs269) { int error; mutex_lock(&iqs269->lock); + /* + * Early revisions of silicon require the following workaround in order + * to restore any OTP-enabled functionality after a soft reset. + */ + if (iqs269->otp_option == IQS269_OTP_OPTION_TWS && + iqs269->ver_info.fw_num < IQS269_VER_INFO_FW_NUM_3) { + error = regmap_multi_reg_write(iqs269->regmap, iqs269_tws_init, + ARRAY_SIZE(iqs269_tws_init)); + if (error) + goto err_mutex; + } + error = regmap_update_bits(iqs269->regmap, IQS269_HALL_UI, IQS269_HALL_UI_ENABLE, iqs269->hall_enable ? ~0 : 0); @@ -1106,19 +1290,37 @@ static int iqs269_input_init(struct iqs269_private *iqs269) } for (i = 0; i < IQS269_NUM_SL; i++) { - if (!iqs269->sys_reg.slider_select[i]) + if (iqs269_slider_type(iqs269, i) == IQS269_SLIDER_NONE) continue; iqs269->slider[i] = devm_input_allocate_device(&client->dev); if (!iqs269->slider[i]) return -ENOMEM; + iqs269->slider[i]->keycodemax = ARRAY_SIZE(iqs269->sl_code[i]); + iqs269->slider[i]->keycode = iqs269->sl_code[i]; + iqs269->slider[i]->keycodesize = sizeof(**iqs269->sl_code); + iqs269->slider[i]->name = i ? "iqs269a_slider_1" : "iqs269a_slider_0"; iqs269->slider[i]->id.bustype = BUS_I2C; - input_set_capability(iqs269->slider[i], EV_KEY, BTN_TOUCH); - input_set_abs_params(iqs269->slider[i], ABS_X, 0, 255, 0, 0); + for (j = 0; j < IQS269_NUM_GESTURES; j++) + if (iqs269->sl_code[i][j] != KEY_RESERVED) + input_set_capability(iqs269->slider[i], EV_KEY, + iqs269->sl_code[i][j]); + + /* + * Present the slider as a narrow trackpad if one or more chan- + * nels have been selected to participate, but no gestures have + * been mapped to a keycode. + */ + if (iqs269_slider_type(iqs269, i) == IQS269_SLIDER_RAW) { + input_set_capability(iqs269->slider[i], + EV_KEY, BTN_TOUCH); + input_set_abs_params(iqs269->slider[i], + ABS_X, 0, 255, 0, 0); + } error = input_register_device(iqs269->slider[i]); if (error) { @@ -1167,28 +1369,62 @@ static int iqs269_report(struct iqs269_private *iqs269) if (be16_to_cpu(flags.system) & IQS269_SYS_FLAGS_IN_ATI) return 0; - error = regmap_raw_read(iqs269->regmap, IQS269_SLIDER_X, slider_x, - sizeof(slider_x)); - if (error) { - dev_err(&client->dev, "Failed to read slider position: %d\n", - error); - return error; + if (iqs269_slider_type(iqs269, 0) == IQS269_SLIDER_RAW || + iqs269_slider_type(iqs269, 1) == IQS269_SLIDER_RAW) { + error = regmap_raw_read(iqs269->regmap, IQS269_SLIDER_X, + slider_x, sizeof(slider_x)); + if (error) { + dev_err(&client->dev, + "Failed to read slider position: %d\n", error); + return error; + } } for (i = 0; i < IQS269_NUM_SL; i++) { - if (!iqs269->sys_reg.slider_select[i]) + flags.gesture >>= (i * IQS269_NUM_GESTURES); + + switch (iqs269_slider_type(iqs269, i)) { + case IQS269_SLIDER_NONE: continue; - /* - * Report BTN_TOUCH if any channel that participates in the - * slider is in a state of touch. - */ - if (flags.states[IQS269_ST_OFFS_TOUCH] & - iqs269->sys_reg.slider_select[i]) { - input_report_key(iqs269->slider[i], BTN_TOUCH, 1); - input_report_abs(iqs269->slider[i], ABS_X, slider_x[i]); - } else { - input_report_key(iqs269->slider[i], BTN_TOUCH, 0); + case IQS269_SLIDER_KEY: + for (j = 0; j < IQS269_NUM_GESTURES; j++) + input_report_key(iqs269->slider[i], + iqs269->sl_code[i][j], + flags.gesture & BIT(j)); + + if (!(flags.gesture & (BIT(IQS269_GESTURE_FLICK_NEG) | + BIT(IQS269_GESTURE_FLICK_POS) | + BIT(IQS269_GESTURE_TAP)))) + break; + + input_sync(iqs269->slider[i]); + + /* + * Momentary gestures are followed by a complementary + * release cycle so as to emulate a full keystroke. + */ + for (j = 0; j < IQS269_NUM_GESTURES; j++) + if (j != IQS269_GESTURE_HOLD) + input_report_key(iqs269->slider[i], + iqs269->sl_code[i][j], + 0); + break; + + case IQS269_SLIDER_RAW: + /* + * The slider is considered to be in a state of touch + * if any selected channels are in a state of touch. + */ + state = flags.states[IQS269_ST_OFFS_TOUCH]; + state &= iqs269->sys_reg.slider_select[i]; + + input_report_key(iqs269->slider[i], BTN_TOUCH, state); + + if (state) + input_report_abs(iqs269->slider[i], + ABS_X, slider_x[i]); + break; } input_sync(iqs269->slider[i]); @@ -1286,7 +1522,7 @@ static ssize_t counts_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%u\n", le16_to_cpu(counts)); + return sysfs_emit(buf, "%u\n", le16_to_cpu(counts)); } static ssize_t hall_bin_show(struct device *dev, @@ -1324,7 +1560,7 @@ static ssize_t hall_bin_show(struct device *dev, return -EINVAL; } - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t hall_enable_show(struct device *dev, @@ -1332,7 +1568,7 @@ static ssize_t hall_enable_show(struct device *dev, { struct iqs269_private *iqs269 = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", iqs269->hall_enable); + return sysfs_emit(buf, "%u\n", iqs269->hall_enable); } static ssize_t hall_enable_store(struct device *dev, @@ -1362,7 +1598,7 @@ static ssize_t ch_number_show(struct device *dev, { struct iqs269_private *iqs269 = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", iqs269->ch_num); + return sysfs_emit(buf, "%u\n", iqs269->ch_num); } static ssize_t ch_number_store(struct device *dev, @@ -1391,8 +1627,7 @@ static ssize_t rx_enable_show(struct device *dev, struct iqs269_private *iqs269 = dev_get_drvdata(dev); struct iqs269_ch_reg *ch_reg = iqs269->sys_reg.ch_reg; - return scnprintf(buf, PAGE_SIZE, "%u\n", - ch_reg[iqs269->ch_num].rx_enable); + return sysfs_emit(buf, "%u\n", ch_reg[iqs269->ch_num].rx_enable); } static ssize_t rx_enable_store(struct device *dev, @@ -1432,7 +1667,7 @@ static ssize_t ati_mode_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t ati_mode_store(struct device *dev, @@ -1465,7 +1700,7 @@ static ssize_t ati_base_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t ati_base_store(struct device *dev, @@ -1498,7 +1733,7 @@ static ssize_t ati_target_show(struct device *dev, if (error) return error; - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t ati_target_store(struct device *dev, @@ -1525,9 +1760,9 @@ static ssize_t ati_trigger_show(struct device *dev, { struct iqs269_private *iqs269 = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - iqs269->ati_current && - completion_done(&iqs269->ati_done)); + return sysfs_emit(buf, "%u\n", + iqs269->ati_current && + completion_done(&iqs269->ati_done)); } static ssize_t ati_trigger_store(struct device *dev, @@ -1596,7 +1831,6 @@ static const struct regmap_config iqs269_regmap_config = { static int iqs269_probe(struct i2c_client *client) { - struct iqs269_ver_info ver_info; struct iqs269_private *iqs269; int error; @@ -1618,14 +1852,16 @@ static int iqs269_probe(struct i2c_client *client) mutex_init(&iqs269->lock); init_completion(&iqs269->ati_done); - error = regmap_raw_read(iqs269->regmap, IQS269_VER_INFO, &ver_info, - sizeof(ver_info)); + iqs269->otp_option = (uintptr_t)device_get_match_data(&client->dev); + + error = regmap_raw_read(iqs269->regmap, IQS269_VER_INFO, + &iqs269->ver_info, sizeof(iqs269->ver_info)); if (error) return error; - if (ver_info.prod_num != IQS269_VER_INFO_PROD_NUM) { + if (iqs269->ver_info.prod_num != IQS269_VER_INFO_PROD_NUM) { dev_err(&client->dev, "Unrecognized product number: 0x%02X\n", - ver_info.prod_num); + iqs269->ver_info.prod_num); return -EINVAL; } @@ -1728,7 +1964,18 @@ static int iqs269_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(iqs269_pm, iqs269_suspend, iqs269_resume); static const struct of_device_id iqs269_of_match[] = { - { .compatible = "azoteq,iqs269a" }, + { + .compatible = "azoteq,iqs269a", + .data = (void *)IQS269_OTP_OPTION_DEFAULT, + }, + { + .compatible = "azoteq,iqs269a-00", + .data = (void *)IQS269_OTP_OPTION_DEFAULT, + }, + { + .compatible = "azoteq,iqs269a-d0", + .data = (void *)IQS269_OTP_OPTION_TWS, + }, { } }; MODULE_DEVICE_TABLE(of, iqs269_of_match); diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c index 80f4416ffe2fe98ecc8032527cda68e1b57b019e..0e646f1b257b8fefd21057c31806d82150b4f4db 100644 --- a/drivers/input/misc/max77693-haptic.c +++ b/drivers/input/misc/max77693-haptic.c @@ -307,7 +307,7 @@ static int max77693_haptic_probe(struct platform_device *pdev) haptic->suspend_state = false; /* Variant-specific init */ - haptic->dev_type = platform_get_device_id(pdev)->driver_data; + haptic->dev_type = max77693->type; switch (haptic->dev_type) { case TYPE_MAX77693: haptic->regmap_haptic = max77693->regmap_haptic; @@ -406,16 +406,24 @@ static DEFINE_SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops, max77693_haptic_resume); static const struct platform_device_id max77693_haptic_id[] = { - { "max77693-haptic", TYPE_MAX77693 }, - { "max77843-haptic", TYPE_MAX77843 }, + { "max77693-haptic", }, + { "max77843-haptic", }, {}, }; MODULE_DEVICE_TABLE(platform, max77693_haptic_id); +static const struct of_device_id of_max77693_haptic_dt_match[] = { + { .compatible = "maxim,max77693-haptic", }, + { .compatible = "maxim,max77843-haptic", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, of_max77693_haptic_dt_match); + static struct platform_driver max77693_haptic_driver = { .driver = { .name = "max77693-haptic", .pm = pm_sleep_ptr(&max77693_haptic_pm_ops), + .of_match_table = of_max77693_haptic_dt_match, }, .probe = max77693_haptic_probe, .id_table = max77693_haptic_id, diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c index a84098448f5b94d607dc5b86ceb5f04618783179..5979deabe23d192f47debae819d1ddc488d8edc6 100644 --- a/drivers/input/mouse/cyapa.c +++ b/drivers/input/mouse/cyapa.c @@ -756,16 +756,16 @@ static ssize_t cyapa_show_suspend_scanrate(struct device *dev, switch (pwr_cmd) { case PWR_MODE_BTN_ONLY: - len = scnprintf(buf, PAGE_SIZE, "%s\n", BTN_ONLY_MODE_NAME); + len = sysfs_emit(buf, "%s\n", BTN_ONLY_MODE_NAME); break; case PWR_MODE_OFF: - len = scnprintf(buf, PAGE_SIZE, "%s\n", OFF_MODE_NAME); + len = sysfs_emit(buf, "%s\n", OFF_MODE_NAME); break; default: - len = scnprintf(buf, PAGE_SIZE, "%u\n", - cyapa->gen == CYAPA_GEN3 ? + len = sysfs_emit(buf, "%u\n", + cyapa->gen == CYAPA_GEN3 ? cyapa_pwr_cmd_to_sleep_time(pwr_cmd) : sleep_time); break; @@ -877,8 +877,8 @@ static ssize_t cyapa_show_rt_suspend_scanrate(struct device *dev, mutex_unlock(&cyapa->state_sync_lock); - return scnprintf(buf, PAGE_SIZE, "%u\n", - cyapa->gen == CYAPA_GEN3 ? + return sysfs_emit(buf, "%u\n", + cyapa->gen == CYAPA_GEN3 ? cyapa_pwr_cmd_to_sleep_time(pwr_cmd) : sleep_time); } @@ -988,8 +988,8 @@ static ssize_t cyapa_show_fm_ver(struct device *dev, error = mutex_lock_interruptible(&cyapa->state_sync_lock); if (error) return error; - error = scnprintf(buf, PAGE_SIZE, "%d.%d\n", cyapa->fw_maj_ver, - cyapa->fw_min_ver); + error = sysfs_emit(buf, "%d.%d\n", + cyapa->fw_maj_ver, cyapa->fw_min_ver); mutex_unlock(&cyapa->state_sync_lock); return error; } @@ -1004,7 +1004,7 @@ static ssize_t cyapa_show_product_id(struct device *dev, error = mutex_lock_interruptible(&cyapa->state_sync_lock); if (error) return error; - size = scnprintf(buf, PAGE_SIZE, "%s\n", cyapa->product_id); + size = sysfs_emit(buf, "%s\n", cyapa->product_id); mutex_unlock(&cyapa->state_sync_lock); return size; } @@ -1209,8 +1209,8 @@ static ssize_t cyapa_show_mode(struct device *dev, if (error) return error; - size = scnprintf(buf, PAGE_SIZE, "gen%d %s\n", - cyapa->gen, cyapa_state_to_string(cyapa)); + size = sysfs_emit(buf, "gen%d %s\n", + cyapa->gen, cyapa_state_to_string(cyapa)); mutex_unlock(&cyapa->state_sync_lock); return size; diff --git a/drivers/input/mouse/cyapa_gen3.c b/drivers/input/mouse/cyapa_gen3.c index a97f4acb64526248178715e035b89dd9a93eef37..60c83bc71d84e662d15af2654c54ba2cb39e4147 100644 --- a/drivers/input/mouse/cyapa_gen3.c +++ b/drivers/input/mouse/cyapa_gen3.c @@ -860,7 +860,7 @@ static ssize_t cyapa_gen3_show_baseline(struct device *dev, dev_dbg(dev, "Baseline report successful. Max: %d Min: %d\n", max_baseline, min_baseline); - ret = scnprintf(buf, PAGE_SIZE, "%d %d\n", max_baseline, min_baseline); + ret = sysfs_emit(buf, "%d %d\n", max_baseline, min_baseline); out: return ret; diff --git a/drivers/input/mouse/cyapa_gen5.c b/drivers/input/mouse/cyapa_gen5.c index abf42f77b4c593d1b3f8406832d2a67684780b8d..2e6bcb07257ed7374ac69e288d21fc4133cddb85 100644 --- a/drivers/input/mouse/cyapa_gen5.c +++ b/drivers/input/mouse/cyapa_gen5.c @@ -2418,12 +2418,12 @@ resume_scanning: return resume_error ? resume_error : error; /* 12. Output data strings */ - size = scnprintf(buf, PAGE_SIZE, "%d %d %d %d %d %d %d %d %d %d %d ", + size = sysfs_emit(buf, "%d %d %d %d %d %d %d %d %d %d %d ", gidac_mutual_min, gidac_mutual_max, gidac_mutual_ave, lidac_mutual_min, lidac_mutual_max, lidac_mutual_ave, gidac_self_rx, gidac_self_tx, lidac_self_min, lidac_self_max, lidac_self_ave); - size += scnprintf(buf + size, PAGE_SIZE - size, + size += sysfs_emit_at(buf, size, "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", raw_cap_mutual_min, raw_cap_mutual_max, raw_cap_mutual_ave, raw_cap_self_min, raw_cap_self_max, raw_cap_self_ave, diff --git a/drivers/input/mouse/cyapa_gen6.c b/drivers/input/mouse/cyapa_gen6.c index 0caaf3e64215d00c57196967f229d7892afe8acc..4ffe08fee10c8a465cb3e4042de08b335ffa6f0b 100644 --- a/drivers/input/mouse/cyapa_gen6.c +++ b/drivers/input/mouse/cyapa_gen6.c @@ -629,14 +629,14 @@ static ssize_t cyapa_gen6_show_baseline(struct device *dev, if (error) goto resume_scanning; - size = scnprintf(buf, PAGE_SIZE, "%d %d %d %d %d %d ", - data[0], /* RX Attenuator Mutual */ - data[1], /* IDAC Mutual */ - data[2], /* RX Attenuator Self RX */ - data[3], /* IDAC Self RX */ - data[4], /* RX Attenuator Self TX */ - data[5] /* IDAC Self TX */ - ); + size = sysfs_emit(buf, "%d %d %d %d %d %d ", + data[0], /* RX Attenuator Mutual */ + data[1], /* IDAC Mutual */ + data[2], /* RX Attenuator Self RX */ + data[3], /* IDAC Self RX */ + data[4], /* RX Attenuator Self TX */ + data[5] /* IDAC Self TX */ + ); /* 3. Read Attenuator Trim. */ data_len = sizeof(data); @@ -648,8 +648,8 @@ static ssize_t cyapa_gen6_show_baseline(struct device *dev, /* set attenuator trim values. */ for (i = 0; i < data_len; i++) - size += scnprintf(buf + size, PAGE_SIZE - size, "%d ", data[i]); - size += scnprintf(buf + size, PAGE_SIZE - size, "\n"); + size += sysfs_emit_at(buf, size, "%d ", data[i]); + size += sysfs_emit_at(buf, size, "\n"); resume_scanning: /* 4. Resume Scanning*/ diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 148a601396f92cd5293405fe0355f57672154e4e..8a72c200ccb5d127d1dcd209b8eda29f4b619fef 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -572,7 +572,7 @@ static ssize_t elan_sysfs_read_fw_checksum(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct elan_tp_data *data = i2c_get_clientdata(client); - return sprintf(buf, "0x%04x\n", data->fw_checksum); + return sysfs_emit(buf, "0x%04x\n", data->fw_checksum); } static ssize_t elan_sysfs_read_product_id(struct device *dev, @@ -582,8 +582,8 @@ static ssize_t elan_sysfs_read_product_id(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct elan_tp_data *data = i2c_get_clientdata(client); - return sprintf(buf, ETP_PRODUCT_ID_FORMAT_STRING "\n", - data->product_id); + return sysfs_emit(buf, ETP_PRODUCT_ID_FORMAT_STRING "\n", + data->product_id); } static ssize_t elan_sysfs_read_fw_ver(struct device *dev, @@ -593,7 +593,7 @@ static ssize_t elan_sysfs_read_fw_ver(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct elan_tp_data *data = i2c_get_clientdata(client); - return sprintf(buf, "%d.0\n", data->fw_version); + return sysfs_emit(buf, "%d.0\n", data->fw_version); } static ssize_t elan_sysfs_read_sm_ver(struct device *dev, @@ -603,7 +603,7 @@ static ssize_t elan_sysfs_read_sm_ver(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct elan_tp_data *data = i2c_get_clientdata(client); - return sprintf(buf, "%d.0\n", data->sm_version); + return sysfs_emit(buf, "%d.0\n", data->sm_version); } static ssize_t elan_sysfs_read_iap_ver(struct device *dev, @@ -613,7 +613,7 @@ static ssize_t elan_sysfs_read_iap_ver(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct elan_tp_data *data = i2c_get_clientdata(client); - return sprintf(buf, "%d.0\n", data->iap_version); + return sysfs_emit(buf, "%d.0\n", data->iap_version); } static ssize_t elan_sysfs_update_fw(struct device *dev, @@ -754,7 +754,7 @@ static ssize_t elan_sysfs_read_mode(struct device *dev, if (error) return error; - return sprintf(buf, "%d\n", (int)mode); + return sysfs_emit(buf, "%d\n", (int)mode); } static DEVICE_ATTR(product_id, S_IRUGO, elan_sysfs_read_product_id, NULL); @@ -858,7 +858,7 @@ static ssize_t min_show(struct device *dev, goto out; } - retval = snprintf(buf, PAGE_SIZE, "%d", data->min_baseline); + retval = sysfs_emit(buf, "%d", data->min_baseline); out: mutex_unlock(&data->sysfs_mutex); @@ -881,7 +881,7 @@ static ssize_t max_show(struct device *dev, goto out; } - retval = snprintf(buf, PAGE_SIZE, "%d", data->max_baseline); + retval = sysfs_emit(buf, "%d", data->max_baseline); out: mutex_unlock(&data->sysfs_mutex); diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c index c00dc1275da23daebb21110749f3c3f75431d075..ba757783c258aaddef4d6b882282451df70e2528 100644 --- a/drivers/input/mouse/navpoint.c +++ b/drivers/input/mouse/navpoint.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -32,7 +32,7 @@ struct navpoint { struct ssp_device *ssp; struct input_dev *input; struct device *dev; - int gpio; + struct gpio_desc *gpiod; int index; u8 data[1 + HEADER_LENGTH(0xff)]; }; @@ -170,16 +170,14 @@ static void navpoint_up(struct navpoint *navpoint) dev_err(navpoint->dev, "timeout waiting for SSSR[CSS] to clear\n"); - if (gpio_is_valid(navpoint->gpio)) - gpio_set_value(navpoint->gpio, 1); + gpiod_set_value(navpoint->gpiod, 1); } static void navpoint_down(struct navpoint *navpoint) { struct ssp_device *ssp = navpoint->ssp; - if (gpio_is_valid(navpoint->gpio)) - gpio_set_value(navpoint->gpio, 0); + gpiod_set_value(navpoint->gpiod, 0); pxa_ssp_write_reg(ssp, SSCR0, 0); @@ -216,18 +214,9 @@ static int navpoint_probe(struct platform_device *pdev) return -EINVAL; } - if (gpio_is_valid(pdata->gpio)) { - error = gpio_request_one(pdata->gpio, GPIOF_OUT_INIT_LOW, - "SYNAPTICS_ON"); - if (error) - return error; - } - ssp = pxa_ssp_request(pdata->port, pdev->name); - if (!ssp) { - error = -ENODEV; - goto err_free_gpio; - } + if (!ssp) + return -ENODEV; /* HaRET does not disable devices before jumping into Linux */ if (pxa_ssp_read_reg(ssp, SSCR0) & SSCR0_SSE) { @@ -242,10 +231,18 @@ static int navpoint_probe(struct platform_device *pdev) goto err_free_mem; } + navpoint->gpiod = gpiod_get_optional(&pdev->dev, + NULL, GPIOD_OUT_LOW); + if (IS_ERR(navpoint->gpiod)) { + error = PTR_ERR(navpoint->gpiod); + dev_err(&pdev->dev, "error getting GPIO\n"); + goto err_free_mem; + } + gpiod_set_consumer_name(navpoint->gpiod, "SYNAPTICS_ON"); + navpoint->ssp = ssp; navpoint->input = input; navpoint->dev = &pdev->dev; - navpoint->gpio = pdata->gpio; input->name = pdev->name; input->dev.parent = &pdev->dev; @@ -288,17 +285,12 @@ err_free_mem: input_free_device(input); kfree(navpoint); pxa_ssp_free(ssp); -err_free_gpio: - if (gpio_is_valid(pdata->gpio)) - gpio_free(pdata->gpio); return error; } static void navpoint_remove(struct platform_device *pdev) { - const struct navpoint_platform_data *pdata = - dev_get_platdata(&pdev->dev); struct navpoint *navpoint = platform_get_drvdata(pdev); struct ssp_device *ssp = navpoint->ssp; @@ -308,9 +300,6 @@ static void navpoint_remove(struct platform_device *pdev) kfree(navpoint); pxa_ssp_free(ssp); - - if (gpio_is_valid(pdata->gpio)) - gpio_free(pdata->gpio); } static int navpoint_suspend(struct device *dev) diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c index d7603c50f864b329e1ef4dc9c75df885113e8cbb..cc1d4b424640ea3902cf726329b69c01aa23c4ba 100644 --- a/drivers/input/rmi4/rmi_f01.c +++ b/drivers/input/rmi4/rmi_f01.c @@ -267,8 +267,7 @@ static ssize_t rmi_driver_manufacturer_id_show(struct device *dev, struct rmi_driver_data *data = dev_get_drvdata(dev); struct f01_data *f01 = dev_get_drvdata(&data->f01_container->dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - f01->properties.manufacturer_id); + return sysfs_emit(buf, "%d\n", f01->properties.manufacturer_id); } static DEVICE_ATTR(manufacturer_id, 0444, @@ -280,7 +279,7 @@ static ssize_t rmi_driver_dom_show(struct device *dev, struct rmi_driver_data *data = dev_get_drvdata(dev); struct f01_data *f01 = dev_get_drvdata(&data->f01_container->dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", f01->properties.dom); + return sysfs_emit(buf, "%s\n", f01->properties.dom); } static DEVICE_ATTR(date_of_manufacture, 0444, rmi_driver_dom_show, NULL); @@ -292,7 +291,7 @@ static ssize_t rmi_driver_product_id_show(struct device *dev, struct rmi_driver_data *data = dev_get_drvdata(dev); struct f01_data *f01 = dev_get_drvdata(&data->f01_container->dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", f01->properties.product_id); + return sysfs_emit(buf, "%s\n", f01->properties.product_id); } static DEVICE_ATTR(product_id, 0444, rmi_driver_product_id_show, NULL); @@ -304,7 +303,7 @@ static ssize_t rmi_driver_firmware_id_show(struct device *dev, struct rmi_driver_data *data = dev_get_drvdata(dev); struct f01_data *f01 = dev_get_drvdata(&data->f01_container->dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", f01->properties.firmware_id); + return sysfs_emit(buf, "%d\n", f01->properties.firmware_id); } static DEVICE_ATTR(firmware_id, 0444, rmi_driver_firmware_id_show, NULL); @@ -318,8 +317,8 @@ static ssize_t rmi_driver_package_id_show(struct device *dev, u32 package_id = f01->properties.package_id; - return scnprintf(buf, PAGE_SIZE, "%04x.%04x\n", - package_id & 0xffff, (package_id >> 16) & 0xffff); + return sysfs_emit(buf, "%04x.%04x\n", + package_id & 0xffff, (package_id >> 16) & 0xffff); } static DEVICE_ATTR(package_id, 0444, rmi_driver_package_id_show, NULL); diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index a2c4b3b87f9356602b15e4d5eaaed2cc47f453a6..542a31448c8f13249cddffa1b2c8f5f9fb9f0c02 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -2818,8 +2818,8 @@ static ssize_t mxt_fw_version_show(struct device *dev, { struct mxt_data *data = dev_get_drvdata(dev); struct mxt_info *info = data->info; - return scnprintf(buf, PAGE_SIZE, "%u.%u.%02X\n", - info->version >> 4, info->version & 0xf, info->build); + return sysfs_emit(buf, "%u.%u.%02X\n", + info->version >> 4, info->version & 0xf, info->build); } /* Hardware Version is returned as FamilyID.VariantID */ @@ -2828,8 +2828,7 @@ static ssize_t mxt_hw_version_show(struct device *dev, { struct mxt_data *data = dev_get_drvdata(dev); struct mxt_info *info = data->info; - return scnprintf(buf, PAGE_SIZE, "%u.%u\n", - info->family_id, info->variant_id); + return sysfs_emit(buf, "%u.%u\n", info->family_id, info->variant_id); } static ssize_t mxt_show_instance(char *buf, int count, @@ -2839,19 +2838,18 @@ static ssize_t mxt_show_instance(char *buf, int count, int i; if (mxt_obj_instances(object) > 1) - count += scnprintf(buf + count, PAGE_SIZE - count, - "Instance %u\n", instance); + count += sysfs_emit_at(buf, count, "Instance %u\n", instance); for (i = 0; i < mxt_obj_size(object); i++) - count += scnprintf(buf + count, PAGE_SIZE - count, - "\t[%2u]: %02x (%d)\n", i, val[i], val[i]); - count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); + count += sysfs_emit_at(buf, count, "\t[%2u]: %02x (%d)\n", + i, val[i], val[i]); + count += sysfs_emit_at(buf, count, "\n"); return count; } static ssize_t mxt_object_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mxt_data *data = dev_get_drvdata(dev); struct mxt_object *object; @@ -2872,8 +2870,7 @@ static ssize_t mxt_object_show(struct device *dev, if (!mxt_object_readable(object->type)) continue; - count += scnprintf(buf + count, PAGE_SIZE - count, - "T%u:\n", object->type); + count += sysfs_emit_at(buf, count, "T%u:\n", object->type); for (j = 0; j < mxt_obj_instances(object); j++) { u16 size = mxt_obj_size(object); diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 3e102bcc4a1c7120428ca4c8a40d396797ae9980..2a1db1134476656e8148dd963d68f72de7d57d55 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -431,7 +431,7 @@ static ssize_t edt_ft5x06_setting_show(struct device *dev, *field = val; } - count = scnprintf(buf, PAGE_SIZE, "%d\n", val); + count = sysfs_emit(buf, "%d\n", val); out: mutex_unlock(&tsdata->mutex); return error ?: count; diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index 0f58258306bfc594e84625f51e56cf7cd963615d..eae90676f4e551b67b2d8565665e86ce94ee74d7 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -928,8 +928,7 @@ static ssize_t hideep_fw_version_show(struct device *dev, ssize_t len; mutex_lock(&ts->dev_mutex); - len = scnprintf(buf, PAGE_SIZE, "%04x\n", - be16_to_cpu(ts->dwz_info.release_ver)); + len = sysfs_emit(buf, "%04x\n", be16_to_cpu(ts->dwz_info.release_ver)); mutex_unlock(&ts->dev_mutex); return len; @@ -943,8 +942,7 @@ static ssize_t hideep_product_id_show(struct device *dev, ssize_t len; mutex_lock(&ts->dev_mutex); - len = scnprintf(buf, PAGE_SIZE, "%04x\n", - be16_to_cpu(ts->dwz_info.product_id)); + len = sysfs_emit(buf, "%04x\n", be16_to_cpu(ts->dwz_info.product_id)); mutex_unlock(&ts->dev_mutex); return len; diff --git a/drivers/input/touchscreen/hycon-hy46xx.c b/drivers/input/touchscreen/hycon-hy46xx.c index d0f257989fd6b728091ff3ee1add1d00777507de..2e01d87977c168ff618220aceaa02da0a132e59a 100644 --- a/drivers/input/touchscreen/hycon-hy46xx.c +++ b/drivers/input/touchscreen/hycon-hy46xx.c @@ -202,7 +202,7 @@ static ssize_t hycon_hy46xx_setting_show(struct device *dev, *field = val; } - count = scnprintf(buf, PAGE_SIZE, "%d\n", val); + count = sysfs_emit(buf, "%d\n", val); out: mutex_unlock(&tsdata->mutex); diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c index 90c4934e750a3a3dda59e0688388f8d1f0dcf4b4..fc4e39b6651a4919bea885c5971a04c5dadeeb7f 100644 --- a/drivers/input/touchscreen/ilitek_ts_i2c.c +++ b/drivers/input/touchscreen/ilitek_ts_i2c.c @@ -512,12 +512,12 @@ static ssize_t firmware_version_show(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct ilitek_ts_data *ts = i2c_get_clientdata(client); - return scnprintf(buf, PAGE_SIZE, - "fw version: [%02X%02X.%02X%02X.%02X%02X.%02X%02X]\n", - ts->firmware_ver[0], ts->firmware_ver[1], - ts->firmware_ver[2], ts->firmware_ver[3], - ts->firmware_ver[4], ts->firmware_ver[5], - ts->firmware_ver[6], ts->firmware_ver[7]); + return sysfs_emit(buf, + "fw version: [%02X%02X.%02X%02X.%02X%02X.%02X%02X]\n", + ts->firmware_ver[0], ts->firmware_ver[1], + ts->firmware_ver[2], ts->firmware_ver[3], + ts->firmware_ver[4], ts->firmware_ver[5], + ts->firmware_ver[6], ts->firmware_ver[7]); } static DEVICE_ATTR_RO(firmware_version); @@ -527,8 +527,8 @@ static ssize_t product_id_show(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct ilitek_ts_data *ts = i2c_get_clientdata(client); - return scnprintf(buf, PAGE_SIZE, "product id: [%04X], module: [%s]\n", - ts->mcu_ver, ts->product_id); + return sysfs_emit(buf, "product id: [%04X], module: [%s]\n", + ts->mcu_ver, ts->product_id); } static DEVICE_ATTR_RO(product_id); diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c index a3f4fb85bee58bf1e5f73861bd59637eed4e565f..4d226118f3cc2986efd2a8efce6484733c7dc206 100644 --- a/drivers/input/touchscreen/iqs5xx.c +++ b/drivers/input/touchscreen/iqs5xx.c @@ -943,12 +943,12 @@ static ssize_t fw_info_show(struct device *dev, if (!iqs5xx->dev_id_info.bl_status) return -ENODATA; - return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n", - be16_to_cpu(iqs5xx->dev_id_info.prod_num), - be16_to_cpu(iqs5xx->dev_id_info.proj_num), - iqs5xx->dev_id_info.major_ver, - iqs5xx->dev_id_info.minor_ver, - iqs5xx->exp_file[0], iqs5xx->exp_file[1]); + return sysfs_emit(buf, "%u.%u.%u.%u:%u.%u\n", + be16_to_cpu(iqs5xx->dev_id_info.prod_num), + be16_to_cpu(iqs5xx->dev_id_info.proj_num), + iqs5xx->dev_id_info.major_ver, + iqs5xx->dev_id_info.minor_ver, + iqs5xx->exp_file[0], iqs5xx->exp_file[1]); } static DEVICE_ATTR_WO(fw_file); diff --git a/drivers/input/touchscreen/iqs7211.c b/drivers/input/touchscreen/iqs7211.c index dc084f873762005b3c129cdf154f1feaa5b6758d..f0a56cde899e48054d5aaf6ec3daef02104fa6c5 100644 --- a/drivers/input/touchscreen/iqs7211.c +++ b/drivers/input/touchscreen/iqs7211.c @@ -2401,12 +2401,12 @@ static ssize_t fw_info_show(struct device *dev, { struct iqs7211_private *iqs7211 = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n", - le16_to_cpu(iqs7211->ver_info.prod_num), - le32_to_cpu(iqs7211->ver_info.patch), - le16_to_cpu(iqs7211->ver_info.major), - le16_to_cpu(iqs7211->ver_info.minor), - iqs7211->exp_file[1], iqs7211->exp_file[0]); + return sysfs_emit(buf, "%u.%u.%u.%u:%u.%u\n", + le16_to_cpu(iqs7211->ver_info.prod_num), + le32_to_cpu(iqs7211->ver_info.patch), + le16_to_cpu(iqs7211->ver_info.major), + le16_to_cpu(iqs7211->ver_info.minor), + iqs7211->exp_file[1], iqs7211->exp_file[0]); } static DEVICE_ATTR_RO(fw_info); diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index aa325486f61825e2e30d36a432fffafd8ddb2cbb..78e1c63e530e06b4cf86801a3feef0a02785879f 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -1336,9 +1336,9 @@ static ssize_t mip4_sysfs_read_fw_version(struct device *dev, /* Take lock to prevent racing with firmware update */ mutex_lock(&ts->input->mutex); - count = snprintf(buf, PAGE_SIZE, "%04X %04X %04X %04X\n", - ts->fw_version.boot, ts->fw_version.core, - ts->fw_version.app, ts->fw_version.param); + count = sysfs_emit(buf, "%04X %04X %04X %04X\n", + ts->fw_version.boot, ts->fw_version.core, + ts->fw_version.app, ts->fw_version.param); mutex_unlock(&ts->input->mutex); @@ -1362,8 +1362,8 @@ static ssize_t mip4_sysfs_read_hw_version(struct device *dev, * product_name shows the name or version of the hardware * paired with current firmware in the chip. */ - count = snprintf(buf, PAGE_SIZE, "%.*s\n", - (int)sizeof(ts->product_name), ts->product_name); + count = sysfs_emit(buf, "%.*s\n", + (int)sizeof(ts->product_name), ts->product_name); mutex_unlock(&ts->input->mutex); @@ -1382,7 +1382,7 @@ static ssize_t mip4_sysfs_read_product_id(struct device *dev, mutex_lock(&ts->input->mutex); - count = snprintf(buf, PAGE_SIZE, "%04X\n", ts->product_id); + count = sysfs_emit(buf, "%04X\n", ts->product_id); mutex_unlock(&ts->input->mutex); @@ -1401,8 +1401,8 @@ static ssize_t mip4_sysfs_read_ic_name(struct device *dev, mutex_lock(&ts->input->mutex); - count = snprintf(buf, PAGE_SIZE, "%.*s\n", - (int)sizeof(ts->ic_name), ts->ic_name); + count = sysfs_emit(buf, "%.*s\n", + (int)sizeof(ts->ic_name), ts->ic_name); mutex_unlock(&ts->input->mutex); diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index d6d04b9f04fc1cb272c67f3968556d1e8024cd08..60354ebc7242493ed7625db8e647101b28142748 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -456,8 +456,8 @@ static ssize_t mtouch_firmware_rev_show(struct device *dev, struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); struct mtouch_priv *priv = usbtouch->priv; - return scnprintf(output, PAGE_SIZE, "%1x.%1x\n", - priv->fw_rev_major, priv->fw_rev_minor); + return sysfs_emit(output, "%1x.%1x\n", + priv->fw_rev_major, priv->fw_rev_minor); } static DEVICE_ATTR(firmware_rev, 0444, mtouch_firmware_rev_show, NULL); diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 128341a6696bc01943c1b7eb7eb42ecf93417183..32c7be54434cf26444eef7b8a3a34277c40049bd 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -887,7 +887,7 @@ static ssize_t config_csum_show(struct device *dev, cfg_csum = wdt->param.xmls_id1; cfg_csum = (cfg_csum << 16) | wdt->param.xmls_id2; - return scnprintf(buf, PAGE_SIZE, "%x\n", cfg_csum); + return sysfs_emit(buf, "%x\n", cfg_csum); } static ssize_t fw_version_show(struct device *dev, @@ -896,7 +896,7 @@ static ssize_t fw_version_show(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct wdt87xx_data *wdt = i2c_get_clientdata(client); - return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.fw_id); + return sysfs_emit(buf, "%x\n", wdt->param.fw_id); } static ssize_t plat_id_show(struct device *dev, @@ -905,7 +905,7 @@ static ssize_t plat_id_show(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct wdt87xx_data *wdt = i2c_get_clientdata(client); - return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.plat_id); + return sysfs_emit(buf, "%x\n", wdt->param.plat_id); } static ssize_t update_config_store(struct device *dev, diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 5be5112845e1edf93808bddd3a65fa93347f4997..5680075f0bb84474713705cb43db09b5f4b90e9a 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,7 @@ struct zforce_point { struct zforce_ts { struct i2c_client *client; struct input_dev *input; + struct touchscreen_properties prop; const struct zforce_ts_platdata *pdata; char phys[32]; @@ -266,7 +268,6 @@ static int zforce_setconfig(struct zforce_ts *ts, char b1) static int zforce_start(struct zforce_ts *ts) { struct i2c_client *client = ts->client; - const struct zforce_ts_platdata *pdata = ts->pdata; int ret; dev_dbg(&client->dev, "starting device\n"); @@ -277,7 +278,7 @@ static int zforce_start(struct zforce_ts *ts) return ret; } - ret = zforce_resolution(ts, pdata->x_max, pdata->y_max); + ret = zforce_resolution(ts, ts->prop.max_x, ts->prop.max_y); if (ret) { dev_err(&client->dev, "Unable to set resolution, %d\n", ret); goto error; @@ -337,7 +338,6 @@ static int zforce_stop(struct zforce_ts *ts) static int zforce_touch_event(struct zforce_ts *ts, u8 *payload) { struct i2c_client *client = ts->client; - const struct zforce_ts_platdata *pdata = ts->pdata; struct zforce_point point; int count, i, num = 0; @@ -355,8 +355,8 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload) point.coord_y = payload[9 * i + 4] << 8 | payload[9 * i + 3]; - if (point.coord_x > pdata->x_max || - point.coord_y > pdata->y_max) { + if (point.coord_x > ts->prop.max_x || + point.coord_y > ts->prop.max_y) { dev_warn(&client->dev, "coordinates (%d,%d) invalid\n", point.coord_x, point.coord_y); point.coord_x = point.coord_y = 0; @@ -390,10 +390,9 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload) point.state != STATE_UP); if (point.state != STATE_UP) { - input_report_abs(ts->input, ABS_MT_POSITION_X, - point.coord_x); - input_report_abs(ts->input, ABS_MT_POSITION_Y, - point.coord_y); + touchscreen_report_pos(ts->input, &ts->prop, + point.coord_x, point.coord_y, + true); input_report_abs(ts->input, ABS_MT_TOUCH_MAJOR, point.area_major); input_report_abs(ts->input, ABS_MT_TOUCH_MINOR, @@ -719,15 +718,8 @@ static struct zforce_ts_platdata *zforce_parse_dt(struct device *dev) return ERR_PTR(-ENOMEM); } - if (of_property_read_u32(np, "x-size", &pdata->x_max)) { - dev_err(dev, "failed to get x-size property\n"); - return ERR_PTR(-EINVAL); - } - - if (of_property_read_u32(np, "y-size", &pdata->y_max)) { - dev_err(dev, "failed to get y-size property\n"); - return ERR_PTR(-EINVAL); - } + of_property_read_u32(np, "x-size", &pdata->x_max); + of_property_read_u32(np, "y-size", &pdata->y_max); return pdata; } @@ -856,6 +848,12 @@ static int zforce_probe(struct i2c_client *client) input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0, pdata->y_max, 0, 0); + touchscreen_parse_properties(input_dev, true, &ts->prop); + if (ts->prop.max_x == 0 || ts->prop.max_y == 0) { + dev_err(&client->dev, "no size specified\n"); + return -EINVAL; + } + input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, ZFORCE_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_TOUCH_MINOR, 0, diff --git a/drivers/input/vivaldi-fmap.c b/drivers/input/vivaldi-fmap.c index 6dae83d968067e775b20824a11180a6073c931ba..0d29ec014e2f9b8d8dc53f6ce1981cb850480995 100644 --- a/drivers/input/vivaldi-fmap.c +++ b/drivers/input/vivaldi-fmap.c @@ -27,10 +27,10 @@ ssize_t vivaldi_function_row_physmap_show(const struct vivaldi_data *data, return 0; for (i = 0; i < data->num_function_row_keys; i++) - size += scnprintf(buf + size, PAGE_SIZE - size, - "%s%02X", size ? " " : "", physmap[i]); + size += sysfs_emit_at(buf, size, + "%s%02X", size ? " " : "", physmap[i]); if (size) - size += scnprintf(buf + size, PAGE_SIZE - size, "\n"); + size += sysfs_emit_at(buf, size, "\n"); return size; } diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 7673bb82945b6cbf08ee1d4d44c196dc46875c5a..9a29d742617e3d34e3c9a28d25857230b06d2a68 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -160,6 +160,7 @@ config IOMMU_DMA # Shared Virtual Addressing config IOMMU_SVA + select IOMMU_MM_DATA bool config FSL_PAMU diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 86be1edd50ee9afe027addc7c0c1fcbb4f536de5..8b3601f285fd699dd4d9d4d32e1ac62c2c5e3058 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -53,10 +53,16 @@ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +/* + * This function flushes all internal caches of + * the IOMMU used by this driver. + */ +void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_complete(struct protection_domain *domain); -void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); +void amd_iommu_domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size); int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 90b7d7950a9efa032f116a448db3c96fb7570004..809d74faa1a5d1513b12e8f964575d212996a35a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -902,12 +902,6 @@ extern int amd_iommu_max_glx_val; extern u64 amd_iommu_efr; extern u64 amd_iommu_efr2; -/* - * This function flushes all internal caches of - * the IOMMU used by this driver. - */ -void iommu_flush_all_caches(struct amd_iommu *iommu); - static inline int get_ioapic_devid(int id) { struct devid_map *entry; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 64bcf3df37ee5e3813f2bc71d19c6c18e9540cd2..c83bd0c2a1c9214df007c7ac29e31641adf23702 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2223,7 +2223,7 @@ static int __init amd_iommu_init_pci(void) init_device_table_dma(pci_seg); for_each_iommu(iommu) - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); print_iommu_info(); @@ -2773,7 +2773,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); iommu_enable(iommu); - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } /* @@ -2829,7 +2829,7 @@ static void early_enable_iommus(void) iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); iommu_set_device_table(iommu); - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } } } @@ -3293,7 +3293,7 @@ static int __init state_next(void) uninit_device_table_dma(pci_seg); for_each_iommu(iommu) - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } } return ret; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6c0621f6f572a4c4c0fb72ea1bdb5abe9d504311..2a0d1e97e52fdfe2375c3988c698260ae520cb9f 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -369,6 +369,8 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, bool updated = false; u64 __pte, *pte; int ret, i, count; + size_t size = pgcount << __ffs(pgsize); + unsigned long o_iova = iova; BUG_ON(!IS_ALIGNED(iova, pgsize)); BUG_ON(!IS_ALIGNED(paddr, pgsize)); @@ -424,8 +426,7 @@ out: * Updates and flushing already happened in * increase_address_space(). */ - amd_iommu_domain_flush_tlb_pde(dom); - amd_iommu_domain_flush_complete(dom); + amd_iommu_domain_flush_pages(dom, o_iova, size); spin_unlock_irqrestore(&dom->lock, flags); } diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index f818a7e254d42627ebbd2d3154e290263af37421..6d69ba60744f063462ae8205284004de78d95ed2 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -244,7 +244,6 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, unsigned long mapped_size = 0; unsigned long o_iova = iova; size_t size = pgcount << __ffs(pgsize); - int count = 0; int ret = 0; bool updated = false; @@ -265,19 +264,14 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, *pte = set_pte_attr(paddr, map_size, prot); - count++; iova += map_size; paddr += map_size; mapped_size += map_size; } out: - if (updated) { - if (count > 1) - amd_iommu_flush_tlb(&pdom->domain, 0); - else - amd_iommu_flush_page(&pdom->domain, 0, o_iova); - } + if (updated) + amd_iommu_domain_flush_pages(pdom, o_iova, size); if (mapped) *mapped += mapped_size; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b9a0523cbb0a5cf4de628db0f13128599513be0c..4283dd8191f053e2c879fccd136f336eef5bb962 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -64,7 +64,7 @@ LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); const struct iommu_ops amd_iommu_ops; -const struct iommu_dirty_ops amd_dirty_ops; +static const struct iommu_dirty_ops amd_dirty_ops; int amd_iommu_max_glx_val = -1; @@ -85,6 +85,11 @@ static void detach_device(struct device *dev); * ****************************************************************************/ +static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) +{ + return (pdom && (pdom->flags & PD_IOMMUV2_MASK)); +} + static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { @@ -551,8 +556,6 @@ static void amd_iommu_uninit_device(struct device *dev) if (dev_data->domain) detach_device(dev); - dev_iommu_priv_set(dev, NULL); - /* * We keep dev_data around for unplugged devices and reuse it when the * device is re-plugged - not doing so would introduce a ton of races. @@ -1124,68 +1127,44 @@ static inline u64 build_inv_address(u64 address, size_t size) } static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - size_t size, u16 domid, int pde) + size_t size, u16 domid, + ioasid_t pasid, bool gn) { u64 inv_address = build_inv_address(address, size); memset(cmd, 0, sizeof(*cmd)); + cmd->data[1] |= domid; cmd->data[2] = lower_32_bits(inv_address); cmd->data[3] = upper_32_bits(inv_address); + /* PDE bit - we want to flush everything, not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + if (gn) { + cmd->data[0] |= pasid; + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + } CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, - u64 address, size_t size) + u64 address, size_t size, + ioasid_t pasid, bool gn) { u64 inv_address = build_inv_address(address, size); memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; cmd->data[0] |= (qdep & 0xff) << 24; cmd->data[1] = devid; cmd->data[2] = lower_32_bits(inv_address); cmd->data[3] = upper_32_bits(inv_address); - CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); -} - -static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid, - u64 address, bool size) -{ - memset(cmd, 0, sizeof(*cmd)); - - address &= ~(0xfffULL); - - cmd->data[0] = pasid; - cmd->data[1] = domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); - cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; - cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; - if (size) - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); -} - -static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid, - int qdep, u64 address, bool size) -{ - memset(cmd, 0, sizeof(*cmd)); - - address &= ~(0xfffULL); + if (gn) { + cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; + cmd->data[1] |= (pasid & 0xff) << 16; + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + } - cmd->data[0] = devid; - cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; - cmd->data[0] |= (qdep & 0xff) << 24; - cmd->data[1] = devid; - cmd->data[1] |= (pasid & 0xff) << 16; - cmd->data[2] = lower_32_bits(address); - cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; - cmd->data[3] = upper_32_bits(address); - if (size) - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); } @@ -1341,7 +1320,7 @@ static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) for (dom_id = 0; dom_id <= last_bdf; ++dom_id) { struct iommu_cmd cmd; build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - dom_id, 1); + dom_id, IOMMU_NO_PASID, false); iommu_queue_command(iommu, &cmd); } @@ -1353,7 +1332,7 @@ static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id) struct iommu_cmd cmd; build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - dom_id, 1); + dom_id, IOMMU_NO_PASID, false); iommu_queue_command(iommu, &cmd); iommu_completion_wait(iommu); @@ -1392,7 +1371,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } -void iommu_flush_all_caches(struct amd_iommu *iommu) +void amd_iommu_flush_all_caches(struct amd_iommu *iommu) { if (check_feature(FEATURE_IA)) { amd_iommu_flush_all(iommu); @@ -1406,8 +1385,8 @@ void iommu_flush_all_caches(struct amd_iommu *iommu) /* * Command send function for flushing on-device TLB */ -static int device_flush_iotlb(struct iommu_dev_data *dev_data, - u64 address, size_t size) +static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address, + size_t size, ioasid_t pasid, bool gn) { struct amd_iommu *iommu; struct iommu_cmd cmd; @@ -1418,7 +1397,8 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, if (!iommu) return -EINVAL; - build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); + build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, + size, pasid, gn); return iommu_queue_command(iommu, &cmd); } @@ -1464,8 +1444,11 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } - if (dev_data->ats_enabled) - ret = device_flush_iotlb(dev_data, 0, ~0UL); + if (dev_data->ats_enabled) { + /* Invalidate the entire contents of an IOTLB */ + ret = device_flush_iotlb(dev_data, 0, ~0UL, + IOMMU_NO_PASID, false); + } return ret; } @@ -1476,13 +1459,18 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) * page. Otherwise it flushes the whole TLB of the IOMMU. */ static void __domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size, int pde) + u64 address, size_t size) { struct iommu_dev_data *dev_data; struct iommu_cmd cmd; int ret = 0, i; + ioasid_t pasid = IOMMU_NO_PASID; + bool gn = false; + + if (pdom_is_v2_pgtbl_mode(domain)) + gn = true; - build_inv_iommu_pages(&cmd, address, size, domain->id, pde); + build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, gn); for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (!domain->dev_iommu[i]) @@ -1500,17 +1488,21 @@ static void __domain_flush_pages(struct protection_domain *domain, if (!dev_data->ats_enabled) continue; - ret |= device_flush_iotlb(dev_data, address, size); + ret |= device_flush_iotlb(dev_data, address, size, pasid, gn); } WARN_ON(ret); } -static void domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size, int pde) +void amd_iommu_domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) { if (likely(!amd_iommu_np_cache)) { - __domain_flush_pages(domain, address, size, pde); + __domain_flush_pages(domain, address, size); + + /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ + amd_iommu_domain_flush_complete(domain); + return; } @@ -1543,16 +1535,20 @@ static void domain_flush_pages(struct protection_domain *domain, flush_size = 1ul << min_alignment; - __domain_flush_pages(domain, address, flush_size, pde); + __domain_flush_pages(domain, address, flush_size); address += flush_size; size -= flush_size; } + + /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ + amd_iommu_domain_flush_complete(domain); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) +static void amd_iommu_domain_flush_all(struct protection_domain *domain) { - domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); + amd_iommu_domain_flush_pages(domain, 0, + CMD_INV_IOMMU_ALL_PAGES_ADDRESS); } void amd_iommu_domain_flush_complete(struct protection_domain *domain) @@ -1579,8 +1575,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - domain_flush_pages(domain, iova, size, 1); - amd_iommu_domain_flush_complete(domain); + amd_iommu_domain_flush_pages(domain, iova, size); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1858,11 +1853,8 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); - /* Flush IOTLB */ - amd_iommu_domain_flush_tlb_pde(domain); - - /* Wait for the flushes to finish */ - amd_iommu_domain_flush_complete(domain); + /* Flush IOTLB and wait for the flushes to finish */ + amd_iommu_domain_flush_all(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -1896,15 +1888,6 @@ static int attach_device(struct device *dev, do_attach(dev_data, domain); - /* - * We might boot into a crash-kernel here. The crashed kernel - * left the caches in the IOMMU dirty. So we have to flush - * here to evict all dirty stuff. - */ - amd_iommu_domain_flush_tlb_pde(domain); - - amd_iommu_domain_flush_complete(domain); - out: spin_unlock(&dev_data->lock); @@ -2048,8 +2031,7 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_tlb_pde(domain); - amd_iommu_domain_flush_complete(domain); + amd_iommu_domain_flush_all(domain); } /***************************************************************************** @@ -2482,10 +2464,9 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */ - if (domain_flush) { - amd_iommu_domain_flush_tlb_pde(pdomain); - amd_iommu_domain_flush_complete(pdomain); - } + if (domain_flush) + amd_iommu_domain_flush_all(pdomain); + pdomain->dirty_tracking = enable; spin_unlock_irqrestore(&pdomain->lock, flags); @@ -2588,8 +2569,7 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - amd_iommu_domain_flush_tlb_pde(dom); - amd_iommu_domain_flush_complete(dom); + amd_iommu_domain_flush_all(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -2600,8 +2580,8 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain, unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - domain_flush_pages(dom, gather->start, gather->end - gather->start + 1, 1); - amd_iommu_domain_flush_complete(dom); + amd_iommu_domain_flush_pages(dom, gather->start, + gather->end - gather->start + 1); spin_unlock_irqrestore(&dom->lock, flags); } @@ -2635,7 +2615,7 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; } -const struct iommu_dirty_ops amd_dirty_ops = { +static const struct iommu_dirty_ops amd_dirty_ops = { .set_dirty_tracking = amd_iommu_set_dirty_tracking, .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; @@ -2666,7 +2646,7 @@ const struct iommu_ops amd_iommu_ops = { }; static int __flush_pasid(struct protection_domain *domain, u32 pasid, - u64 address, bool size) + u64 address, size_t size) { struct iommu_dev_data *dev_data; struct iommu_cmd cmd; @@ -2675,7 +2655,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, if (!(domain->flags & PD_IOMMUV2_MASK)) return -EINVAL; - build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size); + build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, true); /* * IOMMU TLB needs to be flushed before Device TLB to @@ -2709,8 +2689,8 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) continue; - build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, - qdep, address, size); + build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, + address, size, pasid, true); ret = iommu_queue_command(iommu, &cmd); if (ret != 0) @@ -2730,7 +2710,7 @@ out: static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid, u64 address) { - return __flush_pasid(domain, pasid, address, false); + return __flush_pasid(domain, pasid, address, PAGE_SIZE); } int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, @@ -2749,8 +2729,7 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) { - return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - true); + return __flush_pasid(domain, pasid, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS); } int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) @@ -3111,8 +3090,8 @@ out: return index; } -static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, - struct irte_ga *irte) +static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, + struct irte_ga *irte) { struct irq_remap_table *table; struct irte_ga *entry; @@ -3139,6 +3118,18 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, raw_spin_unlock_irqrestore(&table->lock, flags); + return 0; +} + +static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, + struct irte_ga *irte) +{ + bool ret; + + ret = __modify_irte_ga(iommu, devid, index, irte); + if (ret) + return ret; + iommu_flush_irt_and_complete(iommu, devid); return 0; @@ -3822,8 +3813,8 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) } entry->lo.fields_vapic.is_run = is_run; - return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry); + return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_update_ga); #endif diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index ee05f4824bfad1d6515fd506e9c1c2fd6760e18f..ef3ee95706dac0997d18f9de6598f529df66064c 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -81,6 +81,7 @@ #define DART_T8020_TCR_BYPASS_DAPF BIT(12) #define DART_T8020_TTBR 0x200 +#define DART_T8020_USB4_TTBR 0x400 #define DART_T8020_TTBR_VALID BIT(31) #define DART_T8020_TTBR_ADDR_FIELD_SHIFT 0 #define DART_T8020_TTBR_SHIFT 12 @@ -368,12 +369,14 @@ apple_dart_t8020_hw_stream_command(struct apple_dart_stream_map *stream_map, u32 command) { unsigned long flags; - int ret; + int ret, i; u32 command_reg; spin_lock_irqsave(&stream_map->dart->lock, flags); - writel(stream_map->sidmap[0], stream_map->dart->regs + DART_T8020_STREAM_SELECT); + for (i = 0; i < BITS_TO_U32(stream_map->dart->num_streams); i++) + writel(stream_map->sidmap[i], + stream_map->dart->regs + DART_T8020_STREAM_SELECT + 4 * i); writel(command, stream_map->dart->regs + DART_T8020_STREAM_COMMAND); ret = readl_poll_timeout_atomic( @@ -740,7 +743,6 @@ static void apple_dart_release_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - dev_iommu_priv_set(dev, NULL); kfree(cfg); } @@ -908,7 +910,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) ret = apple_dart_merge_master_cfg(group_master_cfg, cfg); if (ret) { - dev_err(dev, "Failed to merge DART IOMMU grups.\n"); + dev_err(dev, "Failed to merge DART IOMMU groups.\n"); iommu_group_put(group); res = ERR_PTR(ret); goto out; @@ -1215,6 +1217,33 @@ static const struct apple_dart_hw apple_dart_hw_t8103 = { .ttbr_shift = DART_T8020_TTBR_SHIFT, .ttbr_count = 4, }; + +static const struct apple_dart_hw apple_dart_hw_t8103_usb4 = { + .type = DART_T8020, + .irq_handler = apple_dart_t8020_irq, + .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, + .oas = 36, + .fmt = APPLE_DART, + .max_sid_count = 64, + + .enable_streams = DART_T8020_STREAMS_ENABLE, + .lock = DART_T8020_CONFIG, + .lock_bit = DART_T8020_CONFIG_LOCK, + + .error = DART_T8020_ERROR, + + .tcr = DART_T8020_TCR, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = 0, + + .ttbr = DART_T8020_USB4_TTBR, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8020_TTBR_SHIFT, + .ttbr_count = 4, +}; + static const struct apple_dart_hw apple_dart_hw_t6000 = { .type = DART_T6000, .irq_handler = apple_dart_t8020_irq, @@ -1272,7 +1301,7 @@ static __maybe_unused int apple_dart_suspend(struct device *dev) unsigned int sid, idx; for (sid = 0; sid < dart->num_streams; sid++) { - dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(dart, sid)); + dart->save_tcr[sid] = readl(dart->regs + DART_TCR(dart, sid)); for (idx = 0; idx < dart->hw->ttbr_count; idx++) dart->save_ttbr[sid][idx] = readl(dart->regs + DART_TTBR(dart, sid, idx)); @@ -1307,6 +1336,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dar static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, + { .compatible = "apple,t8103-usb4-dart", .data = &apple_dart_hw_t8103_usb4 }, { .compatible = "apple,t8110-dart", .data = &apple_dart_hw_t8110 }, { .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 }, {}, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 353248ab18e76d3ab1f07c894cfb903f7e424b83..05722121f00e70689680ce7a45cc5e953f50210b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -246,7 +246,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, smmu_domain); } - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, + size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -264,10 +265,11 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), + &quiet_cd); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); smmu_mn->cleared = true; mutex_unlock(&sva_lock); @@ -325,10 +327,13 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { - ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd); + ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm), + cd); if (ret) { - list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head) - arm_smmu_write_ctx_desc(master, mm->pasid, NULL); + list_for_each_entry_from_reverse( + master, &smmu_domain->devices, domain_head) + arm_smmu_write_ctx_desc( + master, mm_get_enqcmd_pasid(mm), NULL); break; } } @@ -358,7 +363,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) list_del(&smmu_mn->list); - arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), + NULL); /* * If we went through clear(), we've already invalidated, and no @@ -366,7 +372,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) */ if (!smmu_mn->cleared) { arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, + 0); } /* Frees smmu_mn */ diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7445454c2af244f03b9274db12e3e4dd325e31ab..0ffb1cf17e0b2e6687b1c5ff12ff87405e2552b6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1063,6 +1063,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, bool cd_live; __le64 *cdptr; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + struct arm_smmu_device *smmu = master->smmu; if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; @@ -1077,6 +1078,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (!cd) { /* (5) */ val = 0; } else if (cd == &quiet_cd) { /* (4) */ + if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); val |= CTXDESC_CD_0_TCR_EPD0; } else if (cd_live) { /* (3) */ val &= ~CTXDESC_CD_0_ASID; @@ -1249,7 +1252,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) } static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, - __le64 *dst) + struct arm_smmu_ste *dst) { /* * This is hideously complicated, but we only really care about @@ -1267,12 +1270,12 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, * 2. Write everything apart from dword 0, sync, write dword 0, sync * 3. Update Config, sync */ - u64 val = le64_to_cpu(dst[0]); + u64 val = le64_to_cpu(dst->data[0]); bool ste_live = false; - struct arm_smmu_device *smmu = NULL; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = NULL; struct arm_smmu_s2_cfg *s2_cfg = NULL; - struct arm_smmu_domain *smmu_domain = NULL; + struct arm_smmu_domain *smmu_domain = master->domain; struct arm_smmu_cmdq_ent prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, .prefetch = { @@ -1280,18 +1283,12 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, }, }; - if (master) { - smmu_domain = master->domain; - smmu = master->smmu; - } - if (smmu_domain) { switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: cd_table = &master->cd_table; break; case ARM_SMMU_DOMAIN_S2: - case ARM_SMMU_DOMAIN_NESTED: s2_cfg = &smmu_domain->s2_cfg; break; default: @@ -1325,10 +1322,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, else val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); - dst[0] = cpu_to_le64(val); - dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + dst->data[0] = cpu_to_le64(val); + dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); - dst[2] = 0; /* Nuke the VMID */ + dst->data[2] = 0; /* Nuke the VMID */ /* * The SMMU can perform negative caching, so we must sync * the STE regardless of whether the old value was live. @@ -1343,7 +1340,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; BUG_ON(ste_live); - dst[1] = cpu_to_le64( + dst->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | @@ -1352,7 +1349,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled) - dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); + dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | @@ -1362,7 +1359,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (s2_cfg) { BUG_ON(ste_live); - dst[2] = cpu_to_le64( + dst->data[2] = cpu_to_le64( FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | #ifdef __BIG_ENDIAN @@ -1371,18 +1368,18 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R); - dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); + dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); } if (master->ats_enabled) - dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, + dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); /* See comment in arm_smmu_write_ctx_desc() */ - WRITE_ONCE(dst[0], cpu_to_le64(val)); + WRITE_ONCE(dst->data[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ @@ -1390,7 +1387,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); } -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force) +static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab, + unsigned int nent, bool force) { unsigned int i; u64 val = STRTAB_STE_0_V; @@ -1401,11 +1399,11 @@ static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); for (i = 0; i < nent; ++i) { - strtab[0] = cpu_to_le64(val); - strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); - strtab[2] = 0; - strtab += STRTAB_STE_DWORDS; + strtab->data[0] = cpu_to_le64(val); + strtab->data[1] = cpu_to_le64(FIELD_PREP( + STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + strtab->data[2] = 0; + strtab++; } } @@ -2171,7 +2169,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) fmt = ARM_64_LPAE_S1; finalise_stage_fn = arm_smmu_domain_finalise_s1; break; - case ARM_SMMU_DOMAIN_NESTED: case ARM_SMMU_DOMAIN_S2: ias = smmu->ias; oas = smmu->oas; @@ -2209,26 +2206,23 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) return 0; } -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) +static struct arm_smmu_ste * +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) { - __le64 *step; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - struct arm_smmu_strtab_l1_desc *l1_desc; - int idx; + unsigned int idx1, idx2; /* Two-level walk */ - idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; - l1_desc = &cfg->l1_desc[idx]; - idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; - step = &l1_desc->l2ptr[idx]; + idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; + idx2 = sid & ((1 << STRTAB_SPLIT) - 1); + return &cfg->l1_desc[idx1].l2ptr[idx2]; } else { /* Simple linear lookup */ - step = &cfg->strtab[sid * STRTAB_STE_DWORDS]; + return (struct arm_smmu_ste *)&cfg + ->strtab[sid * STRTAB_STE_DWORDS]; } - - return step; } static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) @@ -2238,7 +2232,8 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; - __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + struct arm_smmu_ste *step = + arm_smmu_get_step_for_sid(smmu, sid); /* Bridged PCI devices may end up with duplicated IDs */ for (j = 0; j < i; j++) @@ -2649,9 +2644,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) struct arm_smmu_master *master; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!fwspec || fwspec->ops != &arm_smmu_ops) - return ERR_PTR(-ENODEV); - if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) return ERR_PTR(-EBUSY); @@ -2698,7 +2690,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) err_free_master: kfree(master); - dev_iommu_priv_set(dev, NULL); return ERR_PTR(ret); } @@ -2742,7 +2733,7 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain) if (smmu_domain->smmu) ret = -EPERM; else - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -3769,7 +3760,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); list_for_each_entry(e, &rmr_list, list) { - __le64 *step; + struct arm_smmu_ste *step; struct iommu_iort_rmr_data *rmr; int ret, i; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 925ac6a47bceb7b15cd184741d478dc3675d5f09..65fb388d51734d677bf6112090c6eaf94bf5966e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -206,6 +206,11 @@ #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_DWORDS 8 + +struct arm_smmu_ste { + __le64 data[STRTAB_STE_DWORDS]; +}; + #define STRTAB_STE_0_V (1UL << 0) #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1) #define STRTAB_STE_0_CFG_ABORT 0 @@ -571,7 +576,7 @@ struct arm_smmu_priq { struct arm_smmu_strtab_l1_desc { u8 span; - __le64 *l2ptr; + struct arm_smmu_ste *l2ptr; dma_addr_t l2ptr_dma; }; @@ -710,7 +715,6 @@ struct arm_smmu_master { enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, - ARM_SMMU_DOMAIN_NESTED, ARM_SMMU_DOMAIN_BYPASS, }; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 549ae4dba3a681b08832d00bf5057f7d803fc06c..8b04ece00420dd4ed61a63053802f4ffe8248099 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -243,8 +243,10 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,adreno" }, + { .compatible = "qcom,adreno-gmu" }, { .compatible = "qcom,mdp4" }, { .compatible = "qcom,mdss" }, + { .compatible = "qcom,qcm2290-mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, { .compatible = "qcom,sc7280-mdss" }, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index d6d1a2a55cc0692fb02f0f58b901ac438c71604c..68b6bc5e7c71016b8d58a6a077e921b27fb51447 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -82,6 +82,23 @@ static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) pm_runtime_put_autosuspend(smmu->dev); } +static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) +{ + /* + * Setup an autosuspend delay to avoid bouncing runpm state. + * Otherwise, if a driver for a suspended consumer device + * unmaps buffers, it will runpm resume/suspend for each one. + * + * For example, when used by a GPU device, when an application + * or game exits, it can trigger unmapping 100s or 1000s of + * buffers. With a runpm cycle for each buffer, that adds up + * to 5-10sec worth of reprogramming the context bank, while + * the system appears to be locked up to the user. + */ + pm_runtime_set_autosuspend_delay(smmu->dev, 20); + pm_runtime_use_autosuspend(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -392,8 +409,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { u32 fsr, fsynr, cbfrsynra; unsigned long iova; - struct iommu_domain *domain = dev; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_domain *smmu_domain = dev; struct arm_smmu_device *smmu = smmu_domain->smmu; int idx = smmu_domain->cfg.cbndx; int ret; @@ -406,7 +422,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); - ret = report_iommu_fault(domain, NULL, iova, + ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) @@ -607,7 +623,7 @@ static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain, return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); } -static int arm_smmu_init_domain_context(struct iommu_domain *domain, +static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu, struct device *dev) { @@ -616,7 +632,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, struct io_pgtable_ops *pgtbl_ops; struct io_pgtable_cfg pgtbl_cfg; enum io_pgtable_fmt fmt; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct iommu_domain *domain = &smmu_domain->domain; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; irqreturn_t (*context_fault)(int irq, void *dev); @@ -624,12 +640,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu_domain->smmu) goto out_unlock; - if (domain->type == IOMMU_DOMAIN_IDENTITY) { - smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; - smmu_domain->smmu = smmu; - goto out_unlock; - } - /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -796,8 +806,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, else context_fault = arm_smmu_context_fault; - ret = devm_request_irq(smmu->dev, irq, context_fault, - IRQF_SHARED, "arm-smmu-context-fault", domain); + ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED, + "arm-smmu-context-fault", smmu_domain); if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); @@ -818,14 +828,13 @@ out_unlock: return ret; } -static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) +static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; int ret, irq; - if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) + if (!smmu) return; ret = arm_smmu_rpm_get(smmu); @@ -841,7 +850,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { irq = smmu->irqs[cfg->irptndx]; - devm_free_irq(smmu->dev, irq, domain); + devm_free_irq(smmu->dev, irq, smmu_domain); } free_io_pgtable_ops(smmu_domain->pgtbl_ops); @@ -850,14 +859,10 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) arm_smmu_rpm_put(smmu); } -static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) +static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) { - if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) - return NULL; - } /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -870,6 +875,15 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); + if (dev) { + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + + if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) { + kfree(smmu_domain); + return NULL; + } + } + return &smmu_domain->domain; } @@ -881,7 +895,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) * Free the domain resources. We assume that all devices have * already been detached. */ - arm_smmu_destroy_domain_context(domain); + arm_smmu_destroy_domain_context(smmu_domain); kfree(smmu_domain); } @@ -1081,21 +1095,14 @@ static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg, mutex_unlock(&smmu->stream_map_mutex); } -static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master_cfg *cfg, - struct iommu_fwspec *fwspec) +static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg, + enum arm_smmu_s2cr_type type, + u8 cbndx, struct iommu_fwspec *fwspec) { - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = cfg->smmu; struct arm_smmu_s2cr *s2cr = smmu->s2crs; - u8 cbndx = smmu_domain->cfg.cbndx; - enum arm_smmu_s2cr_type type; int i, idx; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) - type = S2CR_TYPE_BYPASS; - else - type = S2CR_TYPE_TRANS; - for_each_cfg_sme(cfg, fwspec, i, idx) { if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx) continue; @@ -1105,7 +1112,6 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, s2cr[idx].cbndx = cbndx; arm_smmu_write_s2cr(smmu, idx); } - return 0; } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1116,11 +1122,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_device *smmu; int ret; - if (!fwspec || fwspec->ops != &arm_smmu_ops) { - dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n"); - return -ENXIO; - } - /* * FIXME: The arch/arm DMA API code tries to attach devices to its own * domains between of_xlate() and probe_device() - we have no way to cope @@ -1139,7 +1140,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return ret; /* Ensure that the domain is finalised */ - ret = arm_smmu_init_domain_context(domain, smmu, dev); + ret = arm_smmu_init_domain_context(smmu_domain, smmu, dev); if (ret < 0) goto rpm_put; @@ -1153,27 +1154,66 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } /* Looks ok, so add the device to the domain */ - ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec); - - /* - * Setup an autosuspend delay to avoid bouncing runpm state. - * Otherwise, if a driver for a suspended consumer device - * unmaps buffers, it will runpm resume/suspend for each one. - * - * For example, when used by a GPU device, when an application - * or game exits, it can trigger unmapping 100s or 1000s of - * buffers. With a runpm cycle for each buffer, that adds up - * to 5-10sec worth of reprogramming the context bank, while - * the system appears to be locked up to the user. - */ - pm_runtime_set_autosuspend_delay(smmu->dev, 20); - pm_runtime_use_autosuspend(smmu->dev); - + arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, + smmu_domain->cfg.cbndx, fwspec); + arm_smmu_rpm_use_autosuspend(smmu); rpm_put: arm_smmu_rpm_put(smmu); return ret; } +static int arm_smmu_attach_dev_type(struct device *dev, + enum arm_smmu_s2cr_type type) +{ + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct arm_smmu_device *smmu; + int ret; + + if (!cfg) + return -ENODEV; + smmu = cfg->smmu; + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + + arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); + arm_smmu_rpm_use_autosuspend(smmu); + arm_smmu_rpm_put(smmu); + return 0; +} + +static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS); +} + +static const struct iommu_domain_ops arm_smmu_identity_ops = { + .attach_dev = arm_smmu_attach_dev_identity, +}; + +static struct iommu_domain arm_smmu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &arm_smmu_identity_ops, +}; + +static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, + struct device *dev) +{ + return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT); +} + +static const struct iommu_domain_ops arm_smmu_blocked_ops = { + .attach_dev = arm_smmu_attach_dev_blocked, +}; + +static struct iommu_domain arm_smmu_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &arm_smmu_blocked_ops, +}; + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -1357,10 +1397,8 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) fwspec = dev_iommu_fwspec_get(dev); if (ret) goto out_free; - } else if (fwspec && fwspec->ops == &arm_smmu_ops) { - smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); } else { - return ERR_PTR(-ENODEV); + smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); } ret = -EINVAL; @@ -1427,7 +1465,6 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_rpm_put(cfg->smmu); - dev_iommu_priv_set(dev, NULL); kfree(cfg); } @@ -1560,8 +1597,10 @@ static int arm_smmu_def_domain_type(struct device *dev) } static struct iommu_ops arm_smmu_ops = { + .identity_domain = &arm_smmu_identity_domain, + .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc = arm_smmu_domain_alloc, + .domain_alloc_paging = arm_smmu_domain_alloc_paging, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize, @@ -2161,7 +2200,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return err; } - err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); + err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, + using_legacy_binding ? NULL : dev); if (err) { dev_err(dev, "Failed to register iommu\n"); iommu_device_sysfs_remove(&smmu->iommu); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 703fd5817ec11f401e9eed0286c39faa655204ee..836ed6799a801fda916207cabb8b289604352b1f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -361,7 +361,6 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, - ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 97b2122032b2371915047aa03d3118005fc49496..17a1c163fef660397f9e39f252563952cf9bd10a 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -79,16 +79,6 @@ static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom) static const struct iommu_ops qcom_iommu_ops; -static struct qcom_iommu_dev * to_iommu(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - if (!fwspec || fwspec->ops != &qcom_iommu_ops) - return NULL; - - return dev_iommu_priv_get(dev); -} - static struct qcom_iommu_ctx * to_ctx(struct qcom_iommu_domain *d, unsigned asid) { struct qcom_iommu_dev *qcom_iommu = d->iommu; @@ -372,7 +362,7 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); + struct qcom_iommu_dev *qcom_iommu = dev_iommu_priv_get(dev); struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); int ret; @@ -404,7 +394,7 @@ static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain, struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct qcom_iommu_domain *qcom_domain; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); + struct qcom_iommu_dev *qcom_iommu = dev_iommu_priv_get(dev); unsigned int i; if (domain == identity_domain || !domain) @@ -535,7 +525,7 @@ static bool qcom_iommu_capable(struct device *dev, enum iommu_cap cap) static struct iommu_device *qcom_iommu_probe_device(struct device *dev) { - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); + struct qcom_iommu_dev *qcom_iommu = dev_iommu_priv_get(dev); struct device_link *link; if (!qcom_iommu) @@ -900,8 +890,16 @@ static void qcom_iommu_device_remove(struct platform_device *pdev) static int __maybe_unused qcom_iommu_resume(struct device *dev) { struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_prepare_enable(CLK_NUM, qcom_iommu->clks); + if (ret < 0) + return ret; + + if (dev->pm_domain) + return qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, 0); - return clk_bulk_prepare_enable(CLK_NUM, qcom_iommu->clks); + return ret; } static int __maybe_unused qcom_iommu_suspend(struct device *dev) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5bffc248af0249e808c8f0d5b40b284170f497fe..4a90d80182026af30c05a8fa970f386e6b04f9a4 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "dma-iommu.h" @@ -1160,6 +1161,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, return DMA_MAPPING_ERROR; } + trace_swiotlb_bounced(dev, phys, size); + aligned_size = iova_align(iovad, size); phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, iova_mask(iovad), dir, attrs); diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index dee61e513be6d44f72d5b2c7071bf4a30e696e5f..86b506af7daa1418e70fb87bded66181248a1ec4 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -106,9 +106,6 @@ static const struct iommu_regset iommu_regs_64[] = { IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8), IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9), IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9), - IOMMU_REGSET_ENTRY(VCCAP), - IOMMU_REGSET_ENTRY(VCMD), - IOMMU_REGSET_ENTRY(VCRSP), }; static struct dentry *intel_iommu_debug; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 897159dba47de4f863f57b365bd025cda0c8cd57..6fb5f6fceea11fb7865d92d8451a5de98a655556 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -46,9 +46,6 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 -#define MAX_AGAW_WIDTH 64 -#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) - #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1) #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1) @@ -63,74 +60,6 @@ #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -/* page table handling */ -#define LEVEL_STRIDE (9) -#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) - -static inline int agaw_to_level(int agaw) -{ - return agaw + 2; -} - -static inline int agaw_to_width(int agaw) -{ - return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); -} - -static inline int width_to_agaw(int width) -{ - return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); -} - -static inline unsigned int level_to_offset_bits(int level) -{ - return (level - 1) * LEVEL_STRIDE; -} - -static inline int pfn_level_offset(u64 pfn, int level) -{ - return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; -} - -static inline u64 level_mask(int level) -{ - return -1ULL << level_to_offset_bits(level); -} - -static inline u64 level_size(int level) -{ - return 1ULL << level_to_offset_bits(level); -} - -static inline u64 align_to_level(u64 pfn, int level) -{ - return (pfn + level_size(level) - 1) & level_mask(level); -} - -static inline unsigned long lvl_to_nr_pages(unsigned int lvl) -{ - return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); -} - -/* VT-d pages must always be _smaller_ than MM pages. Otherwise things - are never going to work. */ -static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) -{ - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); -} -static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) -{ - return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; -} -static inline unsigned long page_to_dma_pfn(struct page *pg) -{ - return mm_to_dma_pfn_start(page_to_pfn(pg)); -} -static inline unsigned long virt_to_dma_pfn(void *p) -{ - return page_to_dma_pfn(virt_to_page(p)); -} - static void __init check_tylersburg_isoch(void); static int rwbf_quirk; @@ -168,78 +97,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } -static inline void context_set_present(struct context_entry *context) -{ - context->lo |= 1; -} - -static inline void context_set_fault_enable(struct context_entry *context) -{ - context->lo &= (((u64)-1) << 2) | 1; -} - -static inline void context_set_translation_type(struct context_entry *context, - unsigned long value) -{ - context->lo &= (((u64)-1) << 4) | 3; - context->lo |= (value & 3) << 2; -} - -static inline void context_set_address_root(struct context_entry *context, - unsigned long value) -{ - context->lo &= ~VTD_PAGE_MASK; - context->lo |= value & VTD_PAGE_MASK; -} - -static inline void context_set_address_width(struct context_entry *context, - unsigned long value) -{ - context->hi |= value & 7; -} - -static inline void context_set_domain_id(struct context_entry *context, - unsigned long value) -{ - context->hi |= (value & ((1 << 16) - 1)) << 8; -} - -static inline void context_set_pasid(struct context_entry *context) -{ - context->lo |= CONTEXT_PASIDE; -} - -static inline int context_domain_id(struct context_entry *c) -{ - return((c->hi >> 8) & 0xffff); -} - -static inline void context_clear_entry(struct context_entry *context) -{ - context->lo = 0; - context->hi = 0; -} - -static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - if (!iommu->copied_tables) - return false; - - return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); -} - -static inline void -set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - set_bit(((long)bus << 8) | devfn, iommu->copied_tables); -} - -static inline void -clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); -} - /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -383,13 +240,12 @@ void free_pgtable_page(void *vaddr) free_page((unsigned long)vaddr); } -static inline int domain_type_is_si(struct dmar_domain *domain) +static int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; } -static inline int domain_pfn_supported(struct dmar_domain *domain, - unsigned long pfn) +static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; @@ -451,7 +307,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) +static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) { return sm_supported(iommu) ? ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); @@ -703,7 +559,7 @@ static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) return false; } -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) +static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct pci_dev *pdev = NULL; @@ -1574,9 +1430,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, } /* Notification for newly created mappings */ -static inline void __mapping_notify_one(struct intel_iommu *iommu, - struct dmar_domain *domain, - unsigned long pfn, unsigned int pages) +static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain, + unsigned long pfn, unsigned int pages) { /* * It's a non-present to present mapping. Only flush if caching mode @@ -1843,7 +1698,7 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) spin_unlock(&iommu->lock); } -static inline int guestwidth_to_adjustwidth(int gaw) +static int guestwidth_to_adjustwidth(int gaw) { int agaw; int r = (gaw - 12) % 9; @@ -1877,7 +1732,7 @@ static void domain_exit(struct dmar_domain *domain) * Value of X in the PDTS field of a scalable mode context entry * indicates PASID directory with 2^(X + 7) entries. */ -static inline unsigned long context_get_sm_pds(struct pasid_table *table) +static unsigned long context_get_sm_pds(struct pasid_table *table) { unsigned long pds, max_pde; @@ -1889,38 +1744,6 @@ static inline unsigned long context_get_sm_pds(struct pasid_table *table) return pds - 7; } -/* - * Set the RID_PASID field of a scalable mode context entry. The - * IOMMU hardware will use the PASID value set in this field for - * DMA translations of DMA requests without PASID. - */ -static inline void -context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) -{ - context->hi |= pasid & ((1 << 20) - 1); -} - -/* - * Set the DTE(Device-TLB Enable) field of a scalable mode context - * entry. - */ -static inline void context_set_sm_dte(struct context_entry *context) -{ - context->lo |= BIT_ULL(2); -} - -/* - * Set the PRE(Page Request Enable) field of a scalable mode context - * entry. - */ -static inline void context_set_sm_pre(struct context_entry *context) -{ - context->lo |= BIT_ULL(4); -} - -/* Convert value to context PASID directory size field coding. */ -#define context_pdts(pds) (((pds) & 0x7) << 9) - static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, struct pasid_table *table, @@ -2081,14 +1904,11 @@ static int domain_context_mapping_cb(struct pci_dev *pdev, static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); struct domain_context_mapping_data data; + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; struct pasid_table *table; - struct intel_iommu *iommu; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; table = intel_pasid_get_table(dev); @@ -2105,18 +1925,15 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) } /* Returns a number of VTD pages, but aligned to MM page size */ -static inline unsigned long aligned_nrpages(unsigned long host_addr, - size_t size) +static unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { host_addr &= ~PAGE_MASK; return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } /* Return largest possible superpage level for a given mapping */ -static inline int hardware_largepage_caps(struct dmar_domain *domain, - unsigned long iov_pfn, - unsigned long phy_pfn, - unsigned long pages) +static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phy_pfn, unsigned long pages) { int support, level = 1; unsigned long pfnmerge; @@ -2449,15 +2266,10 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; + struct intel_iommu *iommu = info->iommu; unsigned long flags; - u8 bus, devfn; int ret; - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - ret = domain_attach_iommu(domain, iommu); if (ret) return ret; @@ -2470,7 +2282,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, domain, + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, @@ -3615,7 +3427,7 @@ void intel_iommu_shutdown(void) up_write(&dmar_global_lock); } -static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) +static struct intel_iommu *dev_to_intel_iommu(struct device *dev) { struct iommu_device *iommu_dev = dev_to_iommu_device(dev); @@ -3694,7 +3506,7 @@ const struct attribute_group *intel_iommu_groups[] = { NULL, }; -static inline bool has_external_pci(void) +static bool has_external_pci(void) { struct pci_dev *pdev = NULL; @@ -4119,14 +3931,11 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct intel_iommu *iommu; + struct intel_iommu *iommu = info->iommu; int addr_width; - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return -ENODEV; - if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; @@ -4403,7 +4212,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) u8 bus, devfn; int ret; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_lookup_iommu(dev, &bus, &devfn); if (!iommu || !iommu->iommu.ops) return ERR_PTR(-ENODEV); @@ -4461,7 +4270,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - dev_iommu_priv_set(dev, NULL); kfree(info); return ERR_PTR(ret); } @@ -4479,7 +4287,6 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); - dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); } @@ -4739,8 +4546,9 @@ static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct dev_pasid_info *curr, *dev_pasid = NULL; + struct intel_iommu *iommu = info->iommu; struct dmar_domain *dmar_domain; struct iommu_domain *domain; unsigned long flags; @@ -4811,8 +4619,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, goto out_free; if (domain_type_is_si(dmar_domain)) - ret = intel_pasid_setup_pass_through(iommu, dmar_domain, - dev, pasid); + ret = intel_pasid_setup_pass_through(iommu, dev, pasid); else if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index ce030c5b5772abadabd3f4a11fb1e443ffe80599..d02f916d8e59a914d2441fa2b81af9ac31dfbf86 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -140,9 +140,6 @@ #define DMAR_ECEO_REG 0x408 #define DMAR_ECRSP_REG 0x410 #define DMAR_ECCAP_REG 0x430 -#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ #define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) #define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) @@ -854,6 +851,181 @@ static inline bool context_present(struct context_entry *context) return (context->lo & 1); } +#define LEVEL_STRIDE (9) +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +#define MAX_AGAW_WIDTH (64) +#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); +} + +static inline int width_to_agaw(int width) +{ + return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return (level - 1) * LEVEL_STRIDE; +} + +static inline int pfn_level_offset(u64 pfn, int level) +{ + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; +} + +static inline u64 level_mask(int level) +{ + return -1ULL << level_to_offset_bits(level); +} + +static inline u64 level_size(int level) +{ + return 1ULL << level_to_offset_bits(level); +} + +static inline u64 align_to_level(u64 pfn, int level) +{ + return (pfn + level_size(level) - 1) & level_mask(level); +} + +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); +} + +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) +{ + return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn_start(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} + +static inline void context_set_present(struct context_entry *context) +{ + context->lo |= 1; +} + +static inline void context_set_fault_enable(struct context_entry *context) +{ + context->lo &= (((u64)-1) << 2) | 1; +} + +static inline void context_set_translation_type(struct context_entry *context, + unsigned long value) +{ + context->lo &= (((u64)-1) << 4) | 3; + context->lo |= (value & 3) << 2; +} + +static inline void context_set_address_root(struct context_entry *context, + unsigned long value) +{ + context->lo &= ~VTD_PAGE_MASK; + context->lo |= value & VTD_PAGE_MASK; +} + +static inline void context_set_address_width(struct context_entry *context, + unsigned long value) +{ + context->hi |= value & 7; +} + +static inline void context_set_domain_id(struct context_entry *context, + unsigned long value) +{ + context->hi |= (value & ((1 << 16) - 1)) << 8; +} + +static inline void context_set_pasid(struct context_entry *context) +{ + context->lo |= CONTEXT_PASIDE; +} + +static inline int context_domain_id(struct context_entry *c) +{ + return((c->hi >> 8) & 0xffff); +} + +static inline void context_clear_entry(struct context_entry *context) +{ + context->lo = 0; + context->hi = 0; +} + +#ifdef CONFIG_INTEL_IOMMU +static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + if (!iommu->copied_tables) + return false; + + return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + set_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} +#endif /* CONFIG_INTEL_IOMMU */ + +/* + * Set the RID_PASID field of a scalable mode context entry. The + * IOMMU hardware will use the PASID value set in this field for + * DMA translations of DMA requests without PASID. + */ +static inline void +context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) +{ + context->hi |= pasid & ((1 << 20) - 1); +} + +/* + * Set the DTE(Device-TLB Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_dte(struct context_entry *context) +{ + context->lo |= BIT_ULL(2); +} + +/* + * Set the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_pre(struct context_entry *context) +{ + context->lo |= BIT_ULL(4); +} + +/* Convert value to context PASID directory size field coding. */ +#define context_pdts(pds) (((pds) & 0x7) << 9) + struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev); int dmar_enable_qi(struct intel_iommu *iommu); @@ -900,7 +1072,6 @@ int dmar_ir_support(void); void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index b5a5563ab32c6bbb3a2e780bb010875b95dd29f4..f26c7f1c46ccaf43b0a4db5209b5c85b484277ed 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -73,9 +73,97 @@ static void intel_nested_domain_free(struct iommu_domain *domain) kfree(to_dmar_domain(domain)); } +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, + unsigned int mask) +{ + struct device_domain_info *info; + unsigned long flags; + u16 sid, qdep; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->ats_enabled) + continue; + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, + IOMMU_NO_PASID, qdep); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + struct iommu_domain_info *info; + unsigned int mask; + unsigned long i; + + xa_for_each(&domain->iommu_array, i, info) + qi_flush_piotlb(info->iommu, + domain_id_iommu(domain, info->iommu), + IOMMU_NO_PASID, addr, npages, ih); + + if (!domain->has_iotlb_device) + return; + + if (npages == U64_MAX) + mask = 64 - VTD_PAGE_SHIFT; + else + mask = ilog2(__roundup_pow_of_two(npages)); + + nested_flush_dev_iotlb(domain, addr, mask); +} + +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, + struct iommu_user_data_array *array) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct iommu_hwpt_vtd_s1_invalidate inv_entry; + u32 index, processed = 0; + int ret = 0; + + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { + ret = -EINVAL; + goto out; + } + + for (index = 0; index < array->entry_num; index++) { + ret = iommu_copy_struct_from_user_array(&inv_entry, array, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + index, __reserved); + if (ret) + break; + + if ((inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) || + inv_entry.__reserved) { + ret = -EOPNOTSUPP; + break; + } + + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { + ret = -EINVAL; + break; + } + + intel_nested_flush_cache(dmar_domain, inv_entry.addr, + inv_entry.npages, + inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF); + processed++; + } + +out: + array->entry_num = processed; + return ret; +} + static const struct iommu_domain_ops intel_nested_domain_ops = { .attach_dev = intel_nested_attach_dev, .free = intel_nested_domain_free, + .cache_invalidate_user = intel_nested_cache_invalidate_user, }; struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 74e8e4c17e81430f216fa88a9575d591de203e3a..3239cefa4c337897dda048ebec7aeb1fc075a955 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -26,63 +26,6 @@ */ u32 intel_pasid_max_id = PASID_MAX; -int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) -{ - unsigned long flags; - u8 status_code; - int ret = 0; - u64 res; - - raw_spin_lock_irqsave(&iommu->register_lock, flags); - dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); - IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, - !(res & VCMD_VRSP_IP), res); - raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - - status_code = VCMD_VRSP_SC(res); - switch (status_code) { - case VCMD_VRSP_SC_SUCCESS: - *pasid = VCMD_VRSP_RESULT_PASID(res); - break; - case VCMD_VRSP_SC_NO_PASID_AVAIL: - pr_info("IOMMU: %s: No PASID available\n", iommu->name); - ret = -ENOSPC; - break; - default: - ret = -ENODEV; - pr_warn("IOMMU: %s: Unexpected error code %d\n", - iommu->name, status_code); - } - - return ret; -} - -void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) -{ - unsigned long flags; - u8 status_code; - u64 res; - - raw_spin_lock_irqsave(&iommu->register_lock, flags); - dmar_writeq(iommu->reg + DMAR_VCMD_REG, - VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); - IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, - !(res & VCMD_VRSP_IP), res); - raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - - status_code = VCMD_VRSP_SC(res); - switch (status_code) { - case VCMD_VRSP_SC_SUCCESS: - break; - case VCMD_VRSP_SC_INVALID_PASID: - pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); - break; - default: - pr_warn("IOMMU: %s: Unexpected error code %d\n", - iommu->name, status_code); - } -} - /* * Per device pasid table management: */ @@ -230,30 +173,6 @@ retry: /* * Interfaces for PASID table entry manipulation: */ -static inline void pasid_clear_entry(struct pasid_entry *pe) -{ - WRITE_ONCE(pe->val[0], 0); - WRITE_ONCE(pe->val[1], 0); - WRITE_ONCE(pe->val[2], 0); - WRITE_ONCE(pe->val[3], 0); - WRITE_ONCE(pe->val[4], 0); - WRITE_ONCE(pe->val[5], 0); - WRITE_ONCE(pe->val[6], 0); - WRITE_ONCE(pe->val[7], 0); -} - -static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) -{ - WRITE_ONCE(pe->val[0], PASID_PTE_FPD); - WRITE_ONCE(pe->val[1], 0); - WRITE_ONCE(pe->val[2], 0); - WRITE_ONCE(pe->val[3], 0); - WRITE_ONCE(pe->val[4], 0); - WRITE_ONCE(pe->val[5], 0); - WRITE_ONCE(pe->val[6], 0); - WRITE_ONCE(pe->val[7], 0); -} - static void intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) { @@ -269,192 +188,6 @@ intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) pasid_clear_entry(pe); } -static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) -{ - u64 old; - - old = READ_ONCE(*ptr); - WRITE_ONCE(*ptr, (old & ~mask) | bits); -} - -static inline u64 pasid_get_bits(u64 *ptr) -{ - return READ_ONCE(*ptr); -} - -/* - * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode - * PASID entry. - */ -static inline void -pasid_set_domain_id(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); -} - -/* - * Get domain ID value of a scalable mode PASID entry. - */ -static inline u16 -pasid_get_domain_id(struct pasid_entry *pe) -{ - return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); -} - -/* - * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_slptr(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); -} - -/* - * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID - * entry. - */ -static inline void -pasid_set_address_width(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); -} - -/* - * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_translation_type(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); -} - -/* - * Enable fault processing by clearing the FPD(Fault Processing - * Disable) field (Bit 1) of a scalable mode PASID entry. - */ -static inline void pasid_set_fault_enable(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 1, 0); -} - -/* - * Enable second level A/D bits by setting the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_ssade(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); -} - -/* - * Disable second level A/D bits by clearing the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry. - */ -static inline void pasid_clear_ssade(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 9, 0); -} - -/* - * Checks if second level A/D bits specifically the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry is set. - */ -static inline bool pasid_get_ssade(struct pasid_entry *pe) -{ - return pasid_get_bits(&pe->val[0]) & (1 << 9); -} - -/* - * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_sre(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 0, 1); -} - -/* - * Setup the WPE(Write Protect Enable) field (Bit 132) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_wpe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); -} - -/* - * Setup the P(Present) field (Bit 0) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_present(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 0, 1); -} - -/* - * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) -{ - pasid_set_bits(&pe->val[1], 1 << 23, value << 23); -} - -/* - * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID - * entry. It is required when XD bit of the first level page table - * entry is about to be set. - */ -static inline void pasid_set_nxe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); -} - -/* - * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode - * PASID entry. - */ -static inline void -pasid_set_pgsnp(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); -} - -/* - * Setup the First Level Page table Pointer field (Bit 140~191) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_flptr(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); -} - -/* - * Setup the First Level Paging Mode field (Bit 130~131) of a - * scalable mode PASID entry. - */ -static inline void -pasid_set_flpm(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); -} - -/* - * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) - * of a scalable mode PASID entry. - */ -static inline void pasid_set_eafe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); -} - static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) @@ -613,9 +346,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, * Skip top levels of page tables for iommu which has less agaw * than default. Unnecessary for PT mode. */ -static inline int iommu_skip_agaw(struct dmar_domain *domain, - struct intel_iommu *iommu, - struct dma_pte **pgd) +static int iommu_skip_agaw(struct dmar_domain *domain, + struct intel_iommu *iommu, + struct dma_pte **pgd) { int agaw; @@ -767,7 +500,6 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, * Set up the scalable mode pasid entry for passthrough translation type. */ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid) { u16 did = FLPT_DEFAULT_DID; diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index dd37611175cc1b9e4009aad7d0c09522147128eb..8d40d4c66e3198a7ce90c83168a3f86491d79f71 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -22,16 +22,6 @@ #define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) #define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) -/* Virtual command interface for enlightened pasid management. */ -#define VCMD_CMD_ALLOC 0x1 -#define VCMD_CMD_FREE 0x2 -#define VCMD_VRSP_IP 0x1 -#define VCMD_VRSP_SC(e) (((e) & 0xff) >> 1) -#define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 16 -#define VCMD_VRSP_SC_INVALID_PASID 16 -#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 16) & 0xfffff) -#define VCMD_CMD_OPERAND(e) ((e) << 16) /* * Domain ID reserved for pasid entries programmed for first-level * only and pass-through transfer modes. @@ -96,6 +86,216 @@ static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7); } +static inline void pasid_clear_entry(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], PASID_PTE_FPD); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +static inline u64 pasid_get_bits(u64 *ptr) +{ + return READ_ONCE(*ptr); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Enable second level A/D bits by setting the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); +} + +/* + * Disable second level A/D bits by clearing the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_clear_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 0); +} + +/* + * Checks if second level A/D bits specifically the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry is set. + */ +static inline bool pasid_get_ssade(struct pasid_entry *pe) +{ + return pasid_get_bits(&pe->val[0]) & (1 << 9); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the WPE(Write Protect Enable) field (Bit 132) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_wpe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value << 23); +} + +/* + * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID + * entry. It is required when XD bit of the first level page table + * entry is about to be set. + */ +static inline void pasid_set_nxe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); +} + +/* + * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode + * PASID entry. + */ +static inline void +pasid_set_pgsnp(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); +} + +/* + * Setup the First Level Page table Pointer field (Bit 140~191) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_flptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); +} + +/* + * Setup the First Level Paging Mode field (Bit 130~131) of a + * scalable mode PASID entry. + */ +static inline void +pasid_set_flpm(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); +} + +/* + * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) + * of a scalable mode PASID entry. + */ +static inline void pasid_set_eafe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); +} + extern unsigned int intel_pasid_max_id; int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); @@ -111,15 +311,12 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid); int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, u32 pasid, struct dmar_domain *domain); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore); -int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid); -void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid); void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, struct device *dev, u32 pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ac12f76c1212ac5f8f3a835f9afc1a0f6737af14..40edd282903fbe7c804512819aa95c3a1ae9d43e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -316,21 +316,22 @@ out: } static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct mm_struct *mm) + struct iommu_domain *domain, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct mm_struct *mm = domain->mm; struct intel_svm_dev *sdev; struct intel_svm *svm; unsigned long sflags; int ret = 0; - svm = pasid_private_find(mm->pasid); + svm = pasid_private_find(pasid); if (!svm) { svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) return -ENOMEM; - svm->pasid = mm->pasid; + svm->pasid = pasid; svm->mm = mm; INIT_LIST_HEAD_RCU(&svm->devs); @@ -368,7 +369,7 @@ static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; - ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid, FLPT_DEFAULT_DID, sflags); if (ret) goto free_sdev; @@ -382,7 +383,7 @@ free_sdev: free_svm: if (list_empty(&svm->devs)) { mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(mm->pasid); + pasid_private_remove(pasid); kfree(svm); } @@ -392,14 +393,9 @@ free_svm: void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) { struct intel_svm_dev *sdev; - struct intel_iommu *iommu; struct intel_svm *svm; struct mm_struct *mm; - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return; - if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) return; mm = svm->mm; @@ -750,25 +746,16 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; struct iommu_fault_page_request *prm; - struct intel_iommu *iommu; bool private_present; bool pasid_present; bool last_page; - u8 bus, devfn; int ret = 0; u16 sid; - if (!dev || !dev_is_pci(dev)) - return -ENODEV; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!msg || !evt) - return -EINVAL; - prm = &evt->fault.prm; sid = PCI_DEVID(bus, devfn); pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; @@ -822,9 +809,8 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - struct mm_struct *mm = domain->mm; - return intel_svm_bind_mm(iommu, dev, mm); + return intel_svm_bind_mm(iommu, dev, domain, pasid); } static void intel_svm_domain_free(struct iommu_domain *domain) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf30d6ec32eaf0795463549076309b0..f7828a7aad410d4406de3d85a97cd419bc29bd0e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -188,20 +188,28 @@ static dma_addr_t __arm_lpae_dma_addr(void *pages) } static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, - struct io_pgtable_cfg *cfg) + struct io_pgtable_cfg *cfg, + void *cookie) { struct device *dev = cfg->iommu_dev; int order = get_order(size); - struct page *p; dma_addr_t dma; void *pages; VM_BUG_ON((gfp & __GFP_HIGHMEM)); - p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); - if (!p) + + if (cfg->alloc) { + pages = cfg->alloc(cookie, size, gfp); + } else { + struct page *p; + + p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); + pages = p ? page_address(p) : NULL; + } + + if (!pages) return NULL; - pages = page_address(p); if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -220,18 +228,28 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, out_unmap: dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); + out_free: - __free_pages(p, order); + if (cfg->free) + cfg->free(cookie, pages, size); + else + free_pages((unsigned long)pages, order); + return NULL; } static void __arm_lpae_free_pages(void *pages, size_t size, - struct io_pgtable_cfg *cfg) + struct io_pgtable_cfg *cfg, + void *cookie) { if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); - free_pages((unsigned long)pages, get_order(size)); + + if (cfg->free) + cfg->free(cookie, pages, size); + else + free_pages((unsigned long)pages, get_order(size)); } static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, @@ -373,13 +391,13 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = READ_ONCE(*ptep); if (!pte) { - cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg); + cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie); if (!cptep) return -ENOMEM; pte = arm_lpae_install_table(cptep, ptep, 0, data); if (pte) - __arm_lpae_free_pages(cptep, tblsz, cfg); + __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie); } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { __arm_lpae_sync_pte(ptep, 1, cfg); } @@ -524,7 +542,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); } - __arm_lpae_free_pages(start, table_size, &data->iop.cfg); + __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie); } static void arm_lpae_free_pgtable(struct io_pgtable *iop) @@ -552,7 +570,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) return 0; - tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie); if (!tablep) return 0; /* Bytes unmapped */ @@ -575,7 +593,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); if (pte != blk_pte) { - __arm_lpae_free_pages(tablep, tablesz, cfg); + __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie); /* * We may race against someone unmapping another part of this * block, but anything else is invalid. We can't misinterpret @@ -882,7 +900,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* Looking good; allocate a pgd */ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), - GFP_KERNEL, cfg); + GFP_KERNEL, cfg, cookie); if (!data->pgd) goto out_free_data; @@ -984,7 +1002,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) /* Allocate pgd pages */ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), - GFP_KERNEL, cfg); + GFP_KERNEL, cfg, cookie); if (!data->pgd) goto out_free_data; @@ -1059,7 +1077,7 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, - cfg); + cfg, cookie); if (!data->pgd) goto out_free_data; @@ -1080,26 +1098,31 @@ out_free_data: } struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_64_lpae_alloc_pgtable_s2, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_32_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_32_lpae_alloc_pgtable_s2, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_mali_lpae_alloc_pgtable, .free = arm_lpae_free_pgtable, }; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index b843fcd365d286668273667401b331a27fc04d23..8841c1487f00481f92759e499ad85a70423b7428 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -34,6 +34,26 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif }; +static int check_custom_allocator(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg) +{ + /* No custom allocator, no need to check the format. */ + if (!cfg->alloc && !cfg->free) + return 0; + + /* When passing a custom allocator, both the alloc and free + * functions should be provided. + */ + if (!cfg->alloc || !cfg->free) + return -EINVAL; + + /* Make sure the format supports custom allocators. */ + if (io_pgtable_init_table[fmt]->caps & IO_PGTABLE_CAP_CUSTOM_ALLOCATOR) + return 0; + + return -EINVAL; +} + struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, struct io_pgtable_cfg *cfg, void *cookie) @@ -44,6 +64,9 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, if (fmt >= IO_PGTABLE_NUM_FMTS) return NULL; + if (check_custom_allocator(fmt, cfg)) + return NULL; + fns = io_pgtable_init_table[fmt]; if (!fns) return NULL; diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index b78671a8a9143fe0b961c69d0ad81fa3224e5a83..c3fc9201d0be97e59395750cda0fc29940c0b844 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -12,32 +12,42 @@ static DEFINE_MUTEX(iommu_sva_lock); /* Allocate a PASID for the mm within range (inclusive) */ -static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev) +static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) { + struct iommu_mm_data *iommu_mm; ioasid_t pasid; - int ret = 0; + + lockdep_assert_held(&iommu_sva_lock); if (!arch_pgtable_dma_compat(mm)) - return -EBUSY; + return ERR_PTR(-EBUSY); - mutex_lock(&iommu_sva_lock); + iommu_mm = mm->iommu_mm; /* Is a PASID already associated with this mm? */ - if (mm_valid_pasid(mm)) { - if (mm->pasid >= dev->iommu->max_pasids) - ret = -EOVERFLOW; - goto out; + if (iommu_mm) { + if (iommu_mm->pasid >= dev->iommu->max_pasids) + return ERR_PTR(-EOVERFLOW); + return iommu_mm; } + iommu_mm = kzalloc(sizeof(struct iommu_mm_data), GFP_KERNEL); + if (!iommu_mm) + return ERR_PTR(-ENOMEM); + pasid = iommu_alloc_global_pasid(dev); if (pasid == IOMMU_PASID_INVALID) { - ret = -ENOSPC; - goto out; + kfree(iommu_mm); + return ERR_PTR(-ENOSPC); } - mm->pasid = pasid; - ret = 0; -out: - mutex_unlock(&iommu_sva_lock); - return ret; + iommu_mm->pasid = pasid; + INIT_LIST_HEAD(&iommu_mm->sva_domains); + /* + * Make sure the write to mm->iommu_mm is not reordered in front of + * initialization to iommu_mm fields. If it does, readers may see a + * valid iommu_mm with uninitialized values. + */ + smp_store_release(&mm->iommu_mm, iommu_mm); + return iommu_mm; } /** @@ -58,57 +68,60 @@ out: */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { + struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; + mutex_lock(&iommu_sva_lock); + /* Allocate mm->pasid if necessary. */ - ret = iommu_sva_alloc_pasid(mm, dev); - if (ret) - return ERR_PTR(ret); + iommu_mm = iommu_alloc_mm_data(mm, dev); + if (IS_ERR(iommu_mm)) { + ret = PTR_ERR(iommu_mm); + goto out_unlock; + } handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return ERR_PTR(-ENOMEM); - - mutex_lock(&iommu_sva_lock); - /* Search for an existing domain. */ - domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid, - IOMMU_DOMAIN_SVA); - if (IS_ERR(domain)) { - ret = PTR_ERR(domain); + if (!handle) { + ret = -ENOMEM; goto out_unlock; } - if (domain) { - domain->users++; - goto out; + /* Search for an existing domain. */ + list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + if (!ret) { + domain->users++; + goto out; + } } /* Allocate a new domain and set it on device pasid. */ domain = iommu_sva_domain_alloc(dev, mm); if (!domain) { ret = -ENOMEM; - goto out_unlock; + goto out_free_handle; } - ret = iommu_attach_device_pasid(domain, dev, mm->pasid); + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); if (ret) goto out_free_domain; domain->users = 1; + list_add(&domain->next, &mm->iommu_mm->sva_domains); + out: mutex_unlock(&iommu_sva_lock); handle->dev = dev; handle->domain = domain; - return handle; out_free_domain: iommu_domain_free(domain); +out_free_handle: + kfree(handle); out_unlock: mutex_unlock(&iommu_sva_lock); - kfree(handle); - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(iommu_sva_bind_device); @@ -124,12 +137,13 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device); void iommu_sva_unbind_device(struct iommu_sva *handle) { struct iommu_domain *domain = handle->domain; - ioasid_t pasid = domain->mm->pasid; + struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { - iommu_detach_device_pasid(domain, dev, pasid); + list_del(&domain->next); iommu_domain_free(domain); } mutex_unlock(&iommu_sva_lock); @@ -141,7 +155,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) { struct iommu_domain *domain = handle->domain; - return domain->mm->pasid; + return mm_get_enqcmd_pasid(domain->mm); } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); @@ -205,8 +219,11 @@ out_put_mm: void mm_pasid_drop(struct mm_struct *mm) { - if (likely(!mm_valid_pasid(mm))) + struct iommu_mm_data *iommu_mm = mm->iommu_mm; + + if (!iommu_mm) return; - iommu_free_global_pasid(mm->pasid); + iommu_free_global_pasid(iommu_mm->pasid); + kfree(iommu_mm); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 33e2a9b5d339e4f82a63e4c8fcbc2e8103af93b5..68e648b55767060204a8f42d1927c09ebacad39a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -148,7 +148,7 @@ struct iommu_group_attribute iommu_group_attr_##_name = \ static LIST_HEAD(iommu_device_list); static DEFINE_SPINLOCK(iommu_device_lock); -static struct bus_type * const iommu_buses[] = { +static const struct bus_type * const iommu_buses[] = { &platform_bus_type, #ifdef CONFIG_PCI &pci_bus_type, @@ -257,13 +257,6 @@ int iommu_device_register(struct iommu_device *iommu, /* We need to be able to take module references appropriately */ if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) return -EINVAL; - /* - * Temporarily enforce global restriction to a single driver. This was - * already the de-facto behaviour, since any possible combination of - * existing drivers would compete for at least the PCI or platform bus. - */ - if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) - return -EBUSY; iommu->ops = ops; if (hwdev) @@ -273,10 +266,8 @@ int iommu_device_register(struct iommu_device *iommu, list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { - iommu_buses[i]->iommu_ops = ops; + for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) err = bus_iommu_probe(iommu_buses[i]); - } if (err) iommu_device_unregister(iommu); return err; @@ -329,7 +320,6 @@ int iommu_device_register_bus(struct iommu_device *iommu, list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - bus->iommu_ops = ops; err = bus_iommu_probe(bus); if (err) { iommu_device_unregister_bus(iommu, bus, nb); @@ -344,6 +334,8 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) { struct dev_iommu *param = dev->iommu; + lockdep_assert_held(&iommu_probe_device_lock); + if (param) return param; @@ -368,6 +360,15 @@ static void dev_iommu_free(struct device *dev) kfree(param); } +/* + * Internal equivalent of device_iommu_mapped() for when we care that a device + * actually has API ops, and don't want false positives from VFIO-only groups. + */ +static bool dev_has_iommu(struct device *dev) +{ + return dev->iommu && dev->iommu->iommu_dev; +} + static u32 dev_iommu_get_max_pasids(struct device *dev) { u32 max_pasids = 0, bits = 0; @@ -386,6 +387,15 @@ static u32 dev_iommu_get_max_pasids(struct device *dev) return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); } +void dev_iommu_priv_set(struct device *dev, void *priv) +{ + /* FSL_PAMU does something weird */ + if (!IS_ENABLED(CONFIG_FSL_PAMU)) + lockdep_assert_held(&iommu_probe_device_lock); + dev->iommu->priv = priv; +} +EXPORT_SYMBOL_GPL(dev_iommu_priv_set); + /* * Init the dev->iommu and dev->iommu_group in the struct device and get the * driver probed @@ -489,11 +499,26 @@ DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops; + struct iommu_fwspec *fwspec; struct iommu_group *group; struct group_device *gdev; int ret; + /* + * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU + * instances with non-NULL fwnodes, and client devices should have been + * identified with a fwspec by this point. Otherwise, we can currently + * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can + * be present, and that any of their registered instances has suitable + * ops for probing, and thus cheekily co-opt the same mechanism. + */ + fwspec = dev_iommu_fwspec_get(dev); + if (fwspec && fwspec->ops) + ops = fwspec->ops; + else + ops = iommu_ops_from_fwnode(NULL); + if (!ops) return -ENODEV; /* @@ -618,7 +643,7 @@ static void __iommu_group_remove_device(struct device *dev) list_del(&device->list); __iommu_group_free_device(group, device); - if (dev->iommu && dev->iommu->iommu_dev) + if (dev_has_iommu(dev)) iommu_deinit_device(dev); else dev->iommu_group = NULL; @@ -817,7 +842,7 @@ int iommu_get_group_resv_regions(struct iommu_group *group, * Non-API groups still expose reserved_regions in sysfs, * so filter out calls that get here that way. */ - if (!device->dev->iommu) + if (!dev_has_iommu(device->dev)) break; INIT_LIST_HEAD(&dev_resv_regions); @@ -1223,6 +1248,12 @@ void iommu_group_remove_device(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_group_remove_device); +static struct device *iommu_group_first_dev(struct iommu_group *group) +{ + lockdep_assert_held(&group->mutex); + return list_first_entry(&group->devices, struct group_device, list)->dev; +} + /** * iommu_group_for_each_dev - iterate over each device in the group * @group: the group @@ -1750,23 +1781,6 @@ __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) return __iommu_group_domain_alloc(group, req_type); } -/* - * Returns the iommu_ops for the devices in an iommu group. - * - * It is assumed that all devices in an iommu group are managed by a single - * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device - * in the group. - */ -static const struct iommu_ops *group_iommu_ops(struct iommu_group *group) -{ - struct group_device *device = - list_first_entry(&group->devices, struct group_device, list); - - lockdep_assert_held(&group->mutex); - - return dev_iommu_ops(device->dev); -} - /* * req_type of 0 means "auto" which means to select a domain based on * iommu_def_domain_type or what the driver actually supports. @@ -1774,7 +1788,7 @@ static const struct iommu_ops *group_iommu_ops(struct iommu_group *group) static struct iommu_domain * iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { - const struct iommu_ops *ops = group_iommu_ops(group); + const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); struct iommu_domain *dom; lockdep_assert_held(&group->mutex); @@ -1854,7 +1868,7 @@ static int iommu_bus_notifier(struct notifier_block *nb, static int iommu_get_def_domain_type(struct iommu_group *group, struct device *dev, int cur_type) { - const struct iommu_ops *ops = group_iommu_ops(group); + const struct iommu_ops *ops = dev_iommu_ops(dev); int type; if (!ops->def_domain_type) @@ -2003,9 +2017,28 @@ int bus_iommu_probe(const struct bus_type *bus) return 0; } +/** + * iommu_present() - make platform-specific assumptions about an IOMMU + * @bus: bus to check + * + * Do not use this function. You want device_iommu_mapped() instead. + * + * Return: true if some IOMMU is present and aware of devices on the given bus; + * in general it may not be the only IOMMU, and it may not have anything to do + * with whatever device you are ultimately interested in. + */ bool iommu_present(const struct bus_type *bus) { - return bus->iommu_ops != NULL; + bool ret = false; + + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { + if (iommu_buses[i] == bus) { + spin_lock(&iommu_device_lock); + ret = !list_empty(&iommu_device_list); + spin_unlock(&iommu_device_lock); + } + } + return ret; } EXPORT_SYMBOL_GPL(iommu_present); @@ -2021,7 +2054,7 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) { const struct iommu_ops *ops; - if (!dev->iommu || !dev->iommu->iommu_dev) + if (!dev_has_iommu(dev)) return false; ops = dev_iommu_ops(dev); @@ -2107,6 +2140,7 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, return ERR_PTR(-ENOMEM); domain->type = type; + domain->owner = ops; /* * If not already set, assume all sizes by default; the driver * may override this later @@ -2132,21 +2166,37 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, static struct iommu_domain * __iommu_group_domain_alloc(struct iommu_group *group, unsigned int type) { - struct device *dev = - list_first_entry(&group->devices, struct group_device, list) - ->dev; + struct device *dev = iommu_group_first_dev(group); - return __iommu_domain_alloc(group_iommu_ops(group), dev, type); + return __iommu_domain_alloc(dev_iommu_ops(dev), dev, type); +} + +static int __iommu_domain_alloc_dev(struct device *dev, void *data) +{ + const struct iommu_ops **ops = data; + + if (!dev_has_iommu(dev)) + return 0; + + if (WARN_ONCE(*ops && *ops != dev_iommu_ops(dev), + "Multiple IOMMU drivers present for bus %s, which the public IOMMU API can't fully support yet. You will still need to disable one or more for this to work, sorry!\n", + dev_bus_name(dev))) + return -EBUSY; + + *ops = dev_iommu_ops(dev); + return 0; } struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { + const struct iommu_ops *ops = NULL; + int err = bus_for_each_dev(bus, NULL, &ops, __iommu_domain_alloc_dev); struct iommu_domain *domain; - if (bus == NULL || bus->iommu_ops == NULL) + if (err || !ops) return NULL; - domain = __iommu_domain_alloc(bus->iommu_ops, NULL, - IOMMU_DOMAIN_UNMANAGED); + + domain = __iommu_domain_alloc(ops, NULL, IOMMU_DOMAIN_UNMANAGED); if (IS_ERR(domain)) return NULL; return domain; @@ -2284,10 +2334,16 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev) static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { + struct device *dev; + if (group->domain && group->domain != group->default_domain && group->domain != group->blocking_domain) return -EBUSY; + dev = iommu_group_first_dev(group); + if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) + return -EINVAL; + return __iommu_group_set_domain(group, domain); } @@ -3004,8 +3060,8 @@ EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); */ int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { - if (dev->iommu && dev->iommu->iommu_dev) { - const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; + if (dev_has_iommu(dev)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); if (ops->dev_enable_feat) return ops->dev_enable_feat(dev, feat); @@ -3020,8 +3076,8 @@ EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); */ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { - if (dev->iommu && dev->iommu->iommu_dev) { - const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; + if (dev_has_iommu(dev)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); if (ops->dev_disable_feat) return ops->dev_disable_feat(dev, feat); @@ -3481,6 +3537,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, if (!group) return -ENODEV; + if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) + return -EINVAL; + mutex_lock(&group->mutex); curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); if (curr) { @@ -3569,6 +3628,7 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, domain->type = IOMMU_DOMAIN_SVA; mmgrab(mm); domain->mm = mm; + domain->owner = ops; domain->iopf_handler = iommu_sva_handle_iopf; domain->fault_data = mm; diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index cbb5df0a6c32f835b50535a84bde3f44bfb4d6db..3f3f1fa1a0a946a43eb48ee324ab4979683bb566 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -135,6 +135,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, hwpt->domain = NULL; goto out_abort; } + hwpt->domain->owner = ops; } else { hwpt->domain = iommu_domain_alloc(idev->dev->bus); if (!hwpt->domain) { @@ -233,6 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, hwpt->domain = NULL; goto out_abort; } + hwpt->domain->owner = ops; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -371,3 +373,44 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } + +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; + struct iommu_user_data_array data_array = { + .type = cmd->data_type, + .uptr = u64_to_user_ptr(cmd->data_uptr), + .entry_len = cmd->entry_len, + .entry_num = cmd->entry_num, + }; + struct iommufd_hw_pagetable *hwpt; + u32 done_num = 0; + int rc; + + if (cmd->__reserved) { + rc = -EOPNOTSUPP; + goto out; + } + + if (cmd->entry_num && (!cmd->data_uptr || !cmd->entry_len)) { + rc = -EINVAL; + goto out; + } + + hwpt = iommufd_get_hwpt_nested(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out; + } + + rc = hwpt->domain->ops->cache_invalidate_user(hwpt->domain, + &data_array); + done_num = data_array.entry_num; + + iommufd_put_object(ucmd->ictx, &hwpt->obj); +out: + cmd->entry_num = done_num; + if (iommufd_ucmd_respond(ucmd, sizeof(*cmd))) + return -EFAULT; + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index abae041e256f7ed1a0a6fcc68a48087098effc6b..991f864d1f9bc175b9acc9b9c445d32ed837f878 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -328,6 +328,15 @@ iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) IOMMUFD_OBJ_HWPT_PAGING), struct iommufd_hwpt_paging, common.obj); } + +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_HWPT_NESTED), + struct iommufd_hw_pagetable, obj); +} + int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); @@ -345,6 +354,7 @@ void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); void iommufd_hwpt_nested_abort(struct iommufd_object *obj); int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 7910fbe1962d78b9c8b65726fad12e75c0fd4a22..482d4059f5db6aed38ee8aa60f25b791f1e7556d 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -21,6 +21,7 @@ enum { IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, IOMMU_TEST_OP_DIRTY, + IOMMU_TEST_OP_MD_CHECK_IOTLB, }; enum { @@ -121,6 +122,10 @@ struct iommu_test_cmd { __aligned_u64 uptr; __aligned_u64 out_nr_dirty; } dirty; + struct { + __u32 id; + __u32 iotlb; + } check_iotlb; }; __u32 last; }; @@ -148,4 +153,22 @@ struct iommu_hwpt_selftest { __u32 iotlb; }; +/* Should not be equal to any defined value in enum iommu_hwpt_invalidate_data_type */ +#define IOMMU_HWPT_INVALIDATE_DATA_SELFTEST 0xdeadbeef +#define IOMMU_HWPT_INVALIDATE_DATA_SELFTEST_INVALID 0xdadbeef + +/** + * struct iommu_hwpt_invalidate_selftest - Invalidation data for Mock driver + * (IOMMU_HWPT_INVALIDATE_DATA_SELFTEST) + * @flags: Invalidate flags + * @iotlb_id: Invalidate iotlb entry index + * + * If IOMMU_TEST_INVALIDATE_ALL is set in @flags, @iotlb_id will be ignored + */ +struct iommu_hwpt_invalidate_selftest { +#define IOMMU_TEST_INVALIDATE_FLAG_ALL (1 << 0) + __u32 flags; + __u32 iotlb_id; +}; + #endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index c9091e46d208abeea14aea1c649a016c39a077ba..39b32932c61ee4e924e24ab18fc05ec4149829c9 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -322,6 +322,7 @@ union ucmd_buffer { struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; + struct iommu_hwpt_invalidate cache; struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; @@ -360,6 +361,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, struct iommu_hwpt_get_dirty_bitmap, data), + IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, + struct iommu_hwpt_invalidate, __reserved), IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, struct iommu_hwpt_set_dirty_tracking, __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 022ef8f55088a6b1e7d452ad4260510cca5bb303..d9e9920c7eba413eaf25b7840eefdf36a3999a9e 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -25,6 +25,19 @@ static struct iommu_domain_ops domain_nested_ops; size_t iommufd_test_memory_limit = 65536; +struct mock_bus_type { + struct bus_type bus; + struct notifier_block nb; +}; + +static struct mock_bus_type iommufd_mock_bus_type = { + .bus = { + .name = "iommufd_mock", + }, +}; + +static atomic_t mock_dev_num; + enum { MOCK_DIRTY_TRACK = 1, MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, @@ -437,6 +450,8 @@ static struct iommu_device mock_iommu_device = { static struct iommu_device *mock_probe_device(struct device *dev) { + if (dev->bus != &iommufd_mock_bus_type.bus) + return ERR_PTR(-ENODEV); return &mock_iommu_device; } @@ -473,9 +488,59 @@ static void mock_domain_free_nested(struct iommu_domain *domain) kfree(mock_nested); } +static int +mock_domain_cache_invalidate_user(struct iommu_domain *domain, + struct iommu_user_data_array *array) +{ + struct mock_iommu_domain_nested *mock_nested = + container_of(domain, struct mock_iommu_domain_nested, domain); + struct iommu_hwpt_invalidate_selftest inv; + u32 processed = 0; + int i = 0, j; + int rc = 0; + + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_SELFTEST) { + rc = -EINVAL; + goto out; + } + + for ( ; i < array->entry_num; i++) { + rc = iommu_copy_struct_from_user_array(&inv, array, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + i, iotlb_id); + if (rc) + break; + + if (inv.flags & ~IOMMU_TEST_INVALIDATE_FLAG_ALL) { + rc = -EOPNOTSUPP; + break; + } + + if (inv.iotlb_id > MOCK_NESTED_DOMAIN_IOTLB_ID_MAX) { + rc = -EINVAL; + break; + } + + if (inv.flags & IOMMU_TEST_INVALIDATE_FLAG_ALL) { + /* Invalidate all mock iotlb entries and ignore iotlb_id */ + for (j = 0; j < MOCK_NESTED_DOMAIN_IOTLB_NUM; j++) + mock_nested->iotlb[j] = 0; + } else { + mock_nested->iotlb[inv.iotlb_id] = 0; + } + + processed++; + } + +out: + array->entry_num = processed; + return rc; +} + static struct iommu_domain_ops domain_nested_ops = { .free = mock_domain_free_nested, .attach_dev = mock_domain_nop_attach, + .cache_invalidate_user = mock_domain_cache_invalidate_user, }; static inline struct iommufd_hw_pagetable * @@ -526,19 +591,6 @@ get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; } -struct mock_bus_type { - struct bus_type bus; - struct notifier_block nb; -}; - -static struct mock_bus_type iommufd_mock_bus_type = { - .bus = { - .name = "iommufd_mock", - }, -}; - -static atomic_t mock_dev_num; - static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); @@ -793,6 +845,28 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd, return 0; } +static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, + u32 mockpt_id, unsigned int iotlb_id, + u32 iotlb) +{ + struct mock_iommu_domain_nested *mock_nested; + struct iommufd_hw_pagetable *hwpt; + int rc = 0; + + hwpt = get_md_pagetable_nested(ucmd, mockpt_id, &mock_nested); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + mock_nested = container_of(hwpt->domain, + struct mock_iommu_domain_nested, domain); + + if (iotlb_id > MOCK_NESTED_DOMAIN_IOTLB_ID_MAX || + mock_nested->iotlb[iotlb_id] != iotlb) + rc = -EINVAL; + iommufd_put_object(ucmd->ictx, &hwpt->obj); + return rc; +} + struct selftest_access { struct iommufd_access *access; struct file *file; @@ -1274,6 +1348,10 @@ int iommufd_test(struct iommufd_ucmd *ucmd) return iommufd_test_md_check_refs( ucmd, u64_to_user_ptr(cmd->check_refs.uptr), cmd->check_refs.length, cmd->check_refs.refs); + case IOMMU_TEST_OP_MD_CHECK_IOTLB: + return iommufd_test_md_check_iotlb(ucmd, cmd->id, + cmd->check_iotlb.id, + cmd->check_iotlb.iotlb); case IOMMU_TEST_OP_CREATE_ACCESS: return iommufd_test_create_access(ucmd, cmd->id, cmd->create_access.flags); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 75279500a4a824f4246bd3fec4fdfb22b1c2cccc..7abe9e85a570632a74080ba275b44e14882d6cec 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -863,16 +863,11 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct device_link *link; struct device *larbdev; unsigned int larbid, larbidx, i; - if (!fwspec || fwspec->ops != &mtk_iommu_ops) - return ERR_PTR(-ENODEV); /* Not a iommu client device */ - - data = dev_iommu_priv_get(dev); - if (!MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) return &data->iommu; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 67e044c1a7d93bc7e8c398445fd867008d349154..25b41222abaec109b4c87a8feba380401ad61a69 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -481,9 +481,6 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) idx++; } - if (!fwspec || fwspec->ops != &mtk_iommu_v1_ops) - return ERR_PTR(-ENODEV); /* Not a iommu client device */ - data = dev_iommu_priv_get(dev); /* Link the consumer device with the smi-larb device(supplier) */ diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 35ba090f3b5e24e615e7a604f18fb0d532706a6a..719652b608407ad905721d676ff7a0c245e76ee0 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -17,8 +17,6 @@ #include #include -#define NO_IOMMU 1 - static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { @@ -29,7 +27,7 @@ static int of_iommu_xlate(struct device *dev, ops = iommu_ops_from_fwnode(fwnode); if ((ops && !ops->of_xlate) || !of_device_is_available(iommu_spec->np)) - return NO_IOMMU; + return -ENODEV; ret = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); if (ret) @@ -61,7 +59,7 @@ static int of_iommu_configure_dev_id(struct device_node *master_np, "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? NO_IOMMU : err; + return err; err = of_iommu_xlate(dev, &iommu_spec); of_node_put(iommu_spec.np); @@ -72,7 +70,7 @@ static int of_iommu_configure_dev(struct device_node *master_np, struct device *dev) { struct of_phandle_args iommu_spec; - int err = NO_IOMMU, idx = 0; + int err = -ENODEV, idx = 0; while (!of_parse_phandle_with_args(master_np, "iommus", "#iommu-cells", @@ -107,16 +105,21 @@ static int of_iommu_configure_device(struct device_node *master_np, of_iommu_configure_dev(master_np, dev); } -const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +/* + * Returns: + * 0 on success, an iommu was configured + * -ENODEV if the device does not have any IOMMU + * -EPROBEDEFER if probing should be tried again + * -errno fatal errors + */ +int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id) { - const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec; - int err = NO_IOMMU; + int err; if (!master_np) - return NULL; + return -ENODEV; /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); @@ -124,7 +127,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, if (fwspec) { if (fwspec->ops) { mutex_unlock(&iommu_probe_device_lock); - return fwspec->ops; + return 0; } /* In the deferred case, start again from scratch */ iommu_fwspec_free(dev); @@ -147,36 +150,21 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, } else { err = of_iommu_configure_device(master_np, dev, id); } - - /* - * Two success conditions can be represented by non-negative err here: - * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons - * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately - * <0 : any actual error - */ - if (!err) { - /* The fwspec pointer changed, read it again */ - fwspec = dev_iommu_fwspec_get(dev); - ops = fwspec->ops; - } mutex_unlock(&iommu_probe_device_lock); - /* - * If we have reason to believe the IOMMU driver missed the initial - * probe for dev, replay it to get things in order. - */ - if (!err && dev->bus) - err = iommu_probe_device(dev); - - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err < 0) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; - } + if (err == -ENODEV || err == -EPROBE_DEFER) + return err; + if (err) + goto err_log; - return ops; + err = iommu_probe_device(dev); + if (err) + goto err_log; + return 0; + +err_log: + dev_dbg(dev, "Adding to IOMMU failed: %pe\n", ERR_PTR(err)); + return err; } static enum iommu_resv_type __maybe_unused @@ -260,7 +248,14 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) phys_addr_t iova; size_t length; + if (of_dma_is_coherent(dev->of_node)) + prot |= IOMMU_CACHE; + maps = of_translate_dma_region(np, maps, &iova, &length); + if (length == 0) { + dev_warn(dev, "Cannot reserve IOVA region of 0 size\n"); + continue; + } type = iommu_resv_region_get_type(dev, &phys, iova, length); region = iommu_alloc_resv_region(iova, length, prot, type, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index c66b070841dd41e0c322f12515c7d8f919e5bd16..c9528065a59afac738a6f06ba89ef11c90082a72 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1719,7 +1719,6 @@ static void omap_iommu_release_device(struct device *dev) if (!dev->of_node || !arch_data) return; - dev_iommu_priv_set(dev, NULL); kfree(arch_data); } diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 2eb9fb46703b3ae3d836b6c757a511f37d5998e5..537359f109979b703707ff52d398c61866adb73c 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -385,13 +385,7 @@ static phys_addr_t sprd_iommu_iova_to_phys(struct iommu_domain *domain, static struct iommu_device *sprd_iommu_probe_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct sprd_iommu_device *sdev; - - if (!fwspec || fwspec->ops != &sprd_iommu_ops) - return ERR_PTR(-ENODEV); - - sdev = dev_iommu_priv_get(dev); + struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); return &sdev->iommu; } diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 379ebe03efb6d45b42afd8a63b4fcb830bb37903..34db37fd9675cd98403633547b34663c2241821d 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -843,7 +843,7 @@ static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova, .flags = cpu_to_le32(flags), }; - ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); + ret = viommu_add_req(vdomain->viommu, &map, sizeof(map)); if (ret) { viommu_del_mappings(vdomain, iova, end); return ret; @@ -912,6 +912,33 @@ static void viommu_iotlb_sync(struct iommu_domain *domain, viommu_sync_req(vdomain->viommu); } +static int viommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct viommu_domain *vdomain = to_viommu_domain(domain); + + /* + * May be called before the viommu is initialized including + * while creating direct mapping + */ + if (!vdomain->nr_endpoints) + return 0; + return viommu_sync_req(vdomain->viommu); +} + +static void viommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct viommu_domain *vdomain = to_viommu_domain(domain); + + /* + * May be called before the viommu is initialized including + * while creating direct mapping + */ + if (!vdomain->nr_endpoints) + return; + viommu_sync_req(vdomain->viommu); +} + static void viommu_get_resv_regions(struct device *dev, struct list_head *head) { struct iommu_resv_region *entry, *new_entry, *msi = NULL; @@ -969,9 +996,6 @@ static struct iommu_device *viommu_probe_device(struct device *dev) struct viommu_dev *viommu = NULL; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!fwspec || fwspec->ops != &viommu_ops) - return ERR_PTR(-ENODEV); - viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode); if (!viommu) return ERR_PTR(-ENODEV); @@ -1037,6 +1061,8 @@ static bool viommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; + case IOMMU_CAP_DEFERRED_FLUSH: + return true; default: return false; } @@ -1057,7 +1083,9 @@ static struct iommu_ops viommu_ops = { .map_pages = viommu_map_pages, .unmap_pages = viommu_unmap_pages, .iova_to_phys = viommu_iova_to_phys, + .flush_iotlb_all = viommu_flush_iotlb_all, .iotlb_sync = viommu_iotlb_sync, + .iotlb_sync_map = viommu_iotlb_sync_map, .free = viommu_domain_free, } }; diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a2bd72a6d76754ed4526b6a098b095476e1772b..2266358d807466f95d02b431d09ee39805dff5e8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8132,6 +8132,19 @@ static void status_unused(struct seq_file *seq) seq_printf(seq, "\n"); } +static void status_personalities(struct seq_file *seq) +{ + struct md_personality *pers; + + seq_puts(seq, "Personalities : "); + spin_lock(&pers_lock); + list_for_each_entry(pers, &pers_list, list) + seq_printf(seq, "[%s] ", pers->name); + + spin_unlock(&pers_lock); + seq_puts(seq, "\n"); +} + static int status_resync(struct seq_file *seq, struct mddev *mddev) { sector_t max_sectors, resync, res; @@ -8273,20 +8286,10 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) static void *md_seq_start(struct seq_file *seq, loff_t *pos) __acquires(&all_mddevs_lock) { - struct md_personality *pers; - - seq_puts(seq, "Personalities : "); - spin_lock(&pers_lock); - list_for_each_entry(pers, &pers_list, list) - seq_printf(seq, "[%s] ", pers->name); - - spin_unlock(&pers_lock); - seq_puts(seq, "\n"); seq->poll_event = atomic_read(&md_event_count); - spin_lock(&all_mddevs_lock); - return seq_list_start(&all_mddevs, *pos); + return seq_list_start_head(&all_mddevs, *pos); } static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -8297,16 +8300,23 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void md_seq_stop(struct seq_file *seq, void *v) __releases(&all_mddevs_lock) { - status_unused(seq); spin_unlock(&all_mddevs_lock); } static int md_seq_show(struct seq_file *seq, void *v) { - struct mddev *mddev = list_entry(v, struct mddev, all_mddevs); + struct mddev *mddev; sector_t sectors; struct md_rdev *rdev; + if (v == &all_mddevs) { + status_personalities(seq); + if (list_empty(&all_mddevs)) + status_unused(seq); + return 0; + } + + mddev = list_entry(v, struct mddev, all_mddevs); if (!mddev_get(mddev)) return 0; @@ -8382,6 +8392,10 @@ static int md_seq_show(struct seq_file *seq, void *v) } spin_unlock(&mddev->lock); spin_lock(&all_mddevs_lock); + + if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs)) + status_unused(seq); + if (atomic_dec_and_test(&mddev->active)) __mddev_put(mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index aaa434f0c17515f31519199e94468f92ff96b57d..24f0d799fd98ed318f2f1d2fc7b682d5ebf77e4c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1968,12 +1968,12 @@ static void end_sync_write(struct bio *bio) } static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, - int sectors, struct page *page, int rw) + int sectors, struct page *page, blk_opf_t rw) { if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) /* success */ return 1; - if (rw == WRITE) { + if (rw == REQ_OP_WRITE) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) @@ -2090,7 +2090,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) rdev = conf->mirrors[d].rdev; if (r1_sync_page_io(rdev, sect, s, pages[idx], - WRITE) == 0) { + REQ_OP_WRITE) == 0) { r1_bio->bios[d]->bi_end_io = NULL; rdev_dec_pending(rdev, mddev); } @@ -2105,7 +2105,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) rdev = conf->mirrors[d].rdev; if (r1_sync_page_io(rdev, sect, s, pages[idx], - READ) != 0) + REQ_OP_READ) != 0) atomic_add(s, &rdev->corrected_errors); } sectors -= s; @@ -2321,7 +2321,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r1_sync_page_io(rdev, sect, s, - conf->tmppage, WRITE); + conf->tmppage, REQ_OP_WRITE); rdev_dec_pending(rdev, mddev); } } @@ -2335,7 +2335,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); if (r1_sync_page_io(rdev, sect, s, - conf->tmppage, READ)) { + conf->tmppage, REQ_OP_READ)) { atomic_add(s, &rdev->corrected_errors); pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n", mdname(mddev), s, diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_i2c.c b/drivers/media/pci/netup_unidvb/netup_unidvb_i2c.c index bd38ce4442325314162a26621f919a4dce7a86a3..46676f2c89c72766844f26efbf8543e731122790 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_i2c.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_i2c.c @@ -289,7 +289,7 @@ static const struct i2c_algorithm netup_i2c_algorithm = { static const struct i2c_adapter netup_i2c_adapter = { .owner = THIS_MODULE, .name = NETUP_UNIDVB_NAME, - .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, + .class = I2C_CLASS_HWMON, .algo = &netup_i2c_algorithm, }; diff --git a/drivers/media/pci/solo6x10/solo6x10-offsets.h b/drivers/media/pci/solo6x10/solo6x10-offsets.h index f414ee1316f29ca3feecbd0253c585c2adce8cb3..fdbb817e63601c032b312ab83a6a810cfbf71c6c 100644 --- a/drivers/media/pci/solo6x10/solo6x10-offsets.h +++ b/drivers/media/pci/solo6x10/solo6x10-offsets.h @@ -57,16 +57,16 @@ #define SOLO_MP4E_EXT_ADDR(__solo) \ (SOLO_EREF_EXT_ADDR(__solo) + SOLO_EREF_EXT_AREA(__solo)) #define SOLO_MP4E_EXT_SIZE(__solo) \ - max((__solo->nr_chans * 0x00080000), \ - min(((__solo->sdram_size - SOLO_MP4E_EXT_ADDR(__solo)) - \ - __SOLO_JPEG_MIN_SIZE(__solo)), 0x00ff0000)) + clamp(__solo->sdram_size - SOLO_MP4E_EXT_ADDR(__solo) - \ + __SOLO_JPEG_MIN_SIZE(__solo), \ + __solo->nr_chans * 0x00080000, 0x00ff0000) #define __SOLO_JPEG_MIN_SIZE(__solo) (__solo->nr_chans * 0x00080000) #define SOLO_JPEG_EXT_ADDR(__solo) \ (SOLO_MP4E_EXT_ADDR(__solo) + SOLO_MP4E_EXT_SIZE(__solo)) #define SOLO_JPEG_EXT_SIZE(__solo) \ - max(__SOLO_JPEG_MIN_SIZE(__solo), \ - min((__solo->sdram_size - SOLO_JPEG_EXT_ADDR(__solo)), 0x00ff0000)) + clamp(__solo->sdram_size - SOLO_JPEG_EXT_ADDR(__solo), \ + __SOLO_JPEG_MIN_SIZE(__solo), 0x00ff0000) #define SOLO_SDRAM_END(__solo) \ (SOLO_JPEG_EXT_ADDR(__solo) + SOLO_JPEG_EXT_SIZE(__solo)) diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c index 0ff014a9d3cd420081fb16b27c870d505a583146..1b3183951bfe5942f0a096e289e8e87b93b3894f 100644 --- a/drivers/memory/tegra/tegra186.c +++ b/drivers/memory/tegra/tegra186.c @@ -114,9 +114,12 @@ static void tegra186_mc_client_sid_override(struct tegra_mc *mc, static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) { #if IS_ENABLED(CONFIG_IOMMU_API) - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args args; unsigned int i, index = 0; + u32 sid; + + if (!tegra_dev_iommu_get_stream_id(dev, &sid)) + return 0; while (!of_parse_phandle_with_args(dev->of_node, "interconnects", "#interconnect-cells", index, &args)) { @@ -124,11 +127,10 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) for (i = 0; i < mc->soc->num_clients; i++) { const struct tegra_mc_client *client = &mc->soc->clients[i]; - if (client->id == args.args[0]) { - u32 sid = fwspec->ids[0] & MC_SID_STREAMID_OVERRIDE_MASK; - - tegra186_mc_client_sid_override(mc, client, sid); - } + if (client->id == args.args[0]) + tegra186_mc_client_sid_override( + mc, client, + sid & MC_SID_STREAMID_OVERRIDE_MASK); } } diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index a5dcd7a134682e6154f2fe8bc7aec5629ebf45c3..572333ead5fb8b002b87957dfe1dc9ea26330efb 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -242,7 +243,7 @@ static const struct i2c_device_id at24_ids[] = { }; MODULE_DEVICE_TABLE(i2c, at24_ids); -static const struct of_device_id at24_of_match[] = { +static const struct of_device_id __maybe_unused at24_of_match[] = { { .compatible = "atmel,24c00", .data = &at24_data_24c00 }, { .compatible = "atmel,24c01", .data = &at24_data_24c01 }, { .compatible = "atmel,24cs01", .data = &at24_data_24cs01 }, @@ -835,7 +836,7 @@ static struct i2c_driver at24_driver = { .driver = { .name = "at24", .pm = &at24_pm_ops, - .of_match_table = at24_of_match, + .of_match_table = of_match_ptr(at24_of_match), .acpi_match_table = ACPI_PTR(at24_acpi_ids), }, .probe = at24_probe, diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 53415e83821ce3a21f5b02d77a360cfad92221ac..68e79b1272f6b95fa803ac0d571a164654af4ac7 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,11 +80,11 @@ static struct mld2_grec mldv2_zero_grec; static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(struct amt_skb_cb) + sizeof(struct qdisc_skb_cb) > + BUILD_BUG_ON(sizeof(struct amt_skb_cb) + sizeof(struct tc_skb_cb) > sizeof_field(struct sk_buff, cb)); return (struct amt_skb_cb *)((void *)skb->cb + - sizeof(struct qdisc_skb_cb)); + sizeof(struct tc_skb_cb)); } static void __amt_source_gc_work(void) diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index f44ba2600415f639b0d80958414201cf16ed9a38..e2ec69aa46e53154217d75e09d5fc07c212ee73f 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -30,9 +30,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -259,22 +259,13 @@ static int c_can_plat_probe(struct platform_device *pdev) void __iomem *addr; struct net_device *dev; struct c_can_priv *priv; - const struct of_device_id *match; struct resource *mem; int irq; struct clk *clk; const struct c_can_driver_data *drvdata; struct device_node *np = pdev->dev.of_node; - match = of_match_device(c_can_of_table, &pdev->dev); - if (match) { - drvdata = match->data; - } else if (pdev->id_entry->driver_data) { - drvdata = (struct c_can_driver_data *) - platform_get_device_id(pdev)->driver_data; - } else { - return -ENODEV; - } + drvdata = device_get_match_data(&pdev->dev); /* get the appropriate clk */ clk = devm_clk_get(&pdev->dev, NULL); diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index d15f85a40c1e5be5b703fbe9d27c104564f54f58..8ea7f2795551bb4867869a11858cfd50f3f73f67 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -23,11 +23,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include @@ -2034,7 +2034,6 @@ MODULE_DEVICE_TABLE(platform, flexcan_id_table); static int flexcan_probe(struct platform_device *pdev) { - const struct of_device_id *of_id; const struct flexcan_devtype_data *devtype_data; struct net_device *dev; struct flexcan_priv *priv; @@ -2090,14 +2089,7 @@ static int flexcan_probe(struct platform_device *pdev) if (IS_ERR(regs)) return PTR_ERR(regs); - of_id = of_match_device(flexcan_of_match, &pdev->dev); - if (of_id) - devtype_data = of_id->data; - else if (platform_get_device_id(pdev)->driver_data) - devtype_data = (struct flexcan_devtype_data *) - platform_get_device_id(pdev)->driver_data; - else - return -ENODEV; + devtype_data = device_get_match_data(&pdev->dev); if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) && !((devtype_data->quirks & diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index 4837df6efa92685fbb632986dbb51bab67590de5..5b3d69c3b6b66fe1f4cb10a42db9120ebecdea6f 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -12,8 +12,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -290,7 +292,7 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev) int irq, mscan_clksrc = 0; int err = -ENOMEM; - data = of_device_get_match_data(&ofdev->dev); + data = device_get_match_data(&ofdev->dev); if (!data) return -EINVAL; @@ -351,13 +353,11 @@ exit_unmap_mem: static void mpc5xxx_can_remove(struct platform_device *ofdev) { - const struct of_device_id *match; const struct mpc5xxx_can_data *data; struct net_device *dev = platform_get_drvdata(ofdev); struct mscan_priv *priv = netdev_priv(dev); - match = of_match_device(mpc5xxx_can_table, &ofdev->dev); - data = match ? match->data : NULL; + data = device_get_match_data(&ofdev->dev); unregister_mscandev(dev); if (data && data->put_clock) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index abe58f103043360d268d48c4fa4cbd880e1b44b2..3722eaa84234ec90d8decaad66015b8fcc40dfe3 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -1726,8 +1726,7 @@ static int xcan_probe(struct platform_device *pdev) struct net_device *ndev; struct xcan_priv *priv; struct phy *transceiver; - const struct of_device_id *of_id; - const struct xcan_devtype_data *devtype = &xcan_axi_data; + const struct xcan_devtype_data *devtype; void __iomem *addr; int ret; int rx_max, tx_max; @@ -1741,9 +1740,7 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - of_id = of_match_device(xcan_of_match, &pdev->dev); - if (of_id && of_id->data) - devtype = of_id->data; + devtype = device_get_match_data(&pdev->dev); hw_tx_max_property = devtype->flags & XCAN_FLAG_TX_MAILBOXES ? "tx-mailbox-count" : "tx-fifo-depth"; diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index dd50502e21229652fed0fcf73e2b73975bdf010c..ae70eac3be28f84ec3ad00910faf3b4dcbe6bddc 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -1135,6 +1135,8 @@ static int vsc73xx_gpio_probe(struct vsc73xx *vsc) vsc->gc.label = devm_kasprintf(vsc->dev, GFP_KERNEL, "VSC%04x", vsc->chipid); + if (!vsc->gc.label) + return -ENOMEM; vsc->gc.ngpio = 4; vsc->gc.owner = THIS_MODULE; vsc->gc.parent = vsc->dev; diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 068ed52b66c94dfd75d038c2578740d3c8664732..b3c81a2e9d4643bb818f38069c9bfefe5ea290f0 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1490,7 +1490,7 @@ int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx, mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf; mbox_cmd.recv_len = 0; mbox_cmd.recv_status = 0; - mbox_cmd.fn = (octeon_mbox_callback_t)cn23xx_get_vf_stats_callback; + mbox_cmd.fn = cn23xx_get_vf_stats_callback; ctx.stats = stats; atomic_set(&ctx.status, 0); mbox_cmd.fn_arg = (void *)&ctx; diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c index dd5d80fee24f0f8175fd08488e19d5a752f7c9ea..d2fcb3da484e3de50af5d90732c4ebef5c3d508d 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c @@ -429,7 +429,7 @@ int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct) mbox_cmd.q_no = 0; mbox_cmd.recv_len = 0; mbox_cmd.recv_status = 0; - mbox_cmd.fn = (octeon_mbox_callback_t)octeon_pfvf_hs_callback; + mbox_cmd.fn = octeon_pfvf_hs_callback; mbox_cmd.fn_arg = &status; octeon_mbox_write(oct, &mbox_cmd); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h index d92bd7e164775b9523430a04fa5dddfa5bbbc929..9ac85d22c615714c8be71f2a1605e9518619f416 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h @@ -57,7 +57,10 @@ union octeon_mbox_message { } s; }; -typedef void (*octeon_mbox_callback_t)(void *, void *, void *); +struct octeon_mbox_cmd; + +typedef void (*octeon_mbox_callback_t)(struct octeon_device *, + struct octeon_mbox_cmd *, void *); struct octeon_mbox_cmd { union octeon_mbox_message msg; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 18a1c3b6d72c5e4f7320871f43c79cca23f90f91..c8f35d4de271add10d9b65ef0591578621eed7b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -5,6 +5,7 @@ #define _I40E_ADMINQ_CMD_H_ #include +#include /* This header file defines the i40e Admin Queue commands and is shared between * i40e Firmware and Software. diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h index ece3a6b9a5c61e59d103713f5d6bb869e98b2574..ab20202a3da3ca73a92e70185481c35067fdb8d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.h +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h @@ -4,6 +4,7 @@ #ifndef _I40E_DIAG_H_ #define _I40E_DIAG_H_ +#include #include "i40e_adminq_cmd.h" /* forward-declare the HW struct for the compiler */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 4728ba34b0e34cc3f5741bfc62b2ba49e00c2b8f..76218f1cb45958f3963ed340997051846093ec94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -506,6 +506,7 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) rpm_t *rpm = rpmd; u8 num_lmacs; u32 fifo_len; + u16 max_lmac; lmac_info = rpm_read(rpm, 0, RPM2_CMRX_RX_LMACS); /* LMACs are divided into two groups and each group @@ -513,7 +514,11 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) * Group0 lmac_id range {0..3} * Group1 lmac_id range {4..7} */ - fifo_len = rpm->mac_ops->fifo_len / 2; + max_lmac = (rpm_read(rpm, 0, CGX_CONST) >> 24) & 0xFF; + if (max_lmac > 4) + fifo_len = rpm->mac_ops->fifo_len / 2; + else + fifo_len = rpm->mac_ops->fifo_len; if (lmac_id < 4) { num_lmacs = hweight8(lmac_info & 0xF); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 4c98950380d536ed30d146b3a6eeac9b098a1fae..d231f4d2888beefe838fadf6933e05ade826fdeb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -301,6 +301,7 @@ mlxsw_sp_acl_erp_table_alloc(struct mlxsw_sp_acl_erp_core *erp_core, unsigned long *p_index) { unsigned int num_rows, entry_size; + unsigned long index; /* We only allow allocations of entire rows */ if (num_erps % erp_core->num_erp_banks != 0) @@ -309,10 +310,11 @@ mlxsw_sp_acl_erp_table_alloc(struct mlxsw_sp_acl_erp_core *erp_core, entry_size = erp_core->erpt_entries_size[region_type]; num_rows = num_erps / erp_core->num_erp_banks; - *p_index = gen_pool_alloc(erp_core->erp_tables, num_rows * entry_size); - if (*p_index == 0) + index = gen_pool_alloc(erp_core->erp_tables, num_rows * entry_size); + if (!index) return -ENOBUFS; - *p_index -= MLXSW_SP_ACL_ERP_GENALLOC_OFFSET; + + *p_index = index - MLXSW_SP_ACL_ERP_GENALLOC_OFFSET; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index d50786b0a6ce47924c55a9fbc53200f50bd96335..50ea1eff02b2f713ee847a6ea8dffb2ca248ae57 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -681,13 +681,13 @@ static void mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region) { + struct mlxsw_sp_acl_tcam *tcam = mlxsw_sp_acl_to_tcam(mlxsw_sp->acl); const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; ops->region_fini(mlxsw_sp, region->priv); mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, - region->id); + mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); kfree(region); } @@ -1564,6 +1564,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + tcam->max_group_size = min_t(unsigned int, tcam->max_group_size, + MLXSW_REG_PAGT_ACL_MAX_NUM); err = ops->init(mlxsw_sp, tcam->priv, tcam); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2c255ed9b8a9483da266c27cf8e9ddd8d85d2546..7164f9e6370fb76d91aad3c09f4a93f061daf1fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -11472,6 +11472,13 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_netevent_notifier; + mlxsw_sp->router->netdevice_nb.notifier_call = + mlxsw_sp_router_netdevice_event; + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); + if (err) + goto err_register_netdev_notifier; + mlxsw_sp->router->nexthop_nb.notifier_call = mlxsw_sp_nexthop_obj_event; err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), @@ -11487,22 +11494,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_fib_notifier; - mlxsw_sp->router->netdevice_nb.notifier_call = - mlxsw_sp_router_netdevice_event; - err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->netdevice_nb); - if (err) - goto err_register_netdev_notifier; - return 0; -err_register_netdev_notifier: - unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->fib_nb); err_register_fib_notifier: unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->nexthop_nb); err_register_nexthop_notifier: + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &router->netdevice_nb); +err_register_netdev_notifier: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); @@ -11550,11 +11550,11 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_router *router = mlxsw_sp->router; - unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), - &router->netdevice_nb); unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb); unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), &router->nexthop_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &router->netdevice_nb); unregister_netevent_notifier(&router->netevent_nb); unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 61d8bfd12d5fd50d99c1927dac230bdb049ebcae..55408f16fbbc4d4f50e57a0bb43de030dbd7d3f9 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -414,6 +414,7 @@ static const u64 fix_mac[] = { END_SIGN }; +MODULE_DESCRIPTION("Neterion 10GbE driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 39d24e07f30670fc1af40ee988143ffb90990e32..5b69b9268c757fca7aa42545e6ac3d87143d2971 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -396,7 +396,7 @@ nla_put_failure: struct rtnl_link_ops rmnet_link_ops __read_mostly = { .kind = "rmnet", - .maxtype = __IFLA_RMNET_MAX, + .maxtype = IFLA_RMNET_MAX, .priv_size = sizeof(struct rmnet_priv), .setup = rmnet_vnd_setup, .validate = rmnet_rtnl_validate, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8649b3e90edb288d806998d0c97a82c858b0c947..0e3731f50fc2873dc3c4c06c16ffe1f4a8707e83 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1949,7 +1949,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct ravb_tstamp_skb *ts_skb; struct ravb_tx_desc *desc; unsigned long flags; - u32 dma_addr; + dma_addr_t dma_addr; void *buffer; u32 entry; u32 len; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 9f89acf310502225f6a0951ffdcd190b935655e8..f155e4841c62bc707b13c70462837b7c810f8142 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -267,6 +267,7 @@ struct stmmac_priv { u32 msg_enable; int wolopts; int wol_irq; + bool wol_irq_disabled; int clk_csr; struct timer_list eee_ctrl_timer; int lpi_irq; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index dd05437b51f918772cee3f696086ce3312d62675..42d27b97dd1d036e1410131060b65220b0ab2180 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -321,8 +321,9 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (priv->hw->pcs & STMMAC_PCS_RGMII || - priv->hw->pcs & STMMAC_PCS_SGMII) { + if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) && + (priv->hw->pcs & STMMAC_PCS_RGMII || + priv->hw->pcs & STMMAC_PCS_SGMII)) { struct rgmii_adv adv; u32 supported, advertising, lp_advertising; @@ -407,8 +408,9 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (priv->hw->pcs & STMMAC_PCS_RGMII || - priv->hw->pcs & STMMAC_PCS_SGMII) { + if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) && + (priv->hw->pcs & STMMAC_PCS_RGMII || + priv->hw->pcs & STMMAC_PCS_SGMII)) { /* Only support ANE */ if (cmd->base.autoneg != AUTONEG_ENABLE) return -EINVAL; @@ -830,10 +832,16 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (wol->wolopts) { pr_info("stmmac: wakeup enable\n"); device_set_wakeup_enable(priv->device, 1); - enable_irq_wake(priv->wol_irq); + /* Avoid unbalanced enable_irq_wake calls */ + if (priv->wol_irq_disabled) + enable_irq_wake(priv->wol_irq); + priv->wol_irq_disabled = false; } else { device_set_wakeup_enable(priv->device, 0); - disable_irq_wake(priv->wol_irq); + /* Avoid unbalanced disable_irq_wake calls */ + if (!priv->wol_irq_disabled) + disable_irq_wake(priv->wol_irq); + priv->wol_irq_disabled = true; } mutex_lock(&priv->lock); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 47de466e432c0545aba22d9dc2801d2fd733c7b5..a0e46369ae158bf51ddcbd562414e172d97af201 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3628,6 +3628,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ + priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { int_name = priv->int_name_wol; sprintf(int_name, "%s:%s", dev->name, "wol"); @@ -4434,6 +4435,28 @@ dma_map_err: return NETDEV_TX_OK; } +/** + * stmmac_has_ip_ethertype() - Check if packet has IP ethertype + * @skb: socket buffer to check + * + * Check if a packet has an ethertype that will trigger the IP header checks + * and IP/TCP checksum engine of the stmmac core. + * + * Return: true if the ethertype can trigger the checksum engine, false + * otherwise + */ +static bool stmmac_has_ip_ethertype(struct sk_buff *skb) +{ + int depth = 0; + __be16 proto; + + proto = __vlan_get_protocol(skb, eth_header_parse_protocol(skb), + &depth); + + return (depth <= ETH_HLEN) && + (proto == htons(ETH_P_IP) || proto == htons(ETH_P_IPV6)); +} + /** * stmmac_xmit - Tx entry point of the driver * @skb : the socket buffer @@ -4498,9 +4521,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* DWMAC IPs can be synthesized to support tx coe only for a few tx * queues. In that case, checksum offloading for those queues that don't * support tx coe needs to fallback to software checksum calculation. + * + * Packets that won't trigger the COE e.g. most DSA-tagged packets will + * also have to be checksummed in software. */ if (csum_insertion && - priv->plat->tx_queues_cfg[queue].coe_unsupported) { + (priv->plat->tx_queues_cfg[queue].coe_unsupported || + !stmmac_has_ip_ethertype(skb))) { if (unlikely(skb_checksum_help(skb))) goto dma_map_err; csum_insertion = !csum_insertion; @@ -5065,7 +5092,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -5588,7 +5615,7 @@ drain_data: skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index faa0561e988ecb1e8d866f2c9e9b27b109e474d0..9d2f4ac783e43502586b27283a4db73351ca0583 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -56,7 +56,7 @@ #define AM65_CPSW_MAX_PORTS 8 #define AM65_CPSW_MIN_PACKET_SIZE VLAN_ETH_ZLEN -#define AM65_CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) +#define AM65_CPSW_MAX_PACKET_SIZE 2024 #define AM65_CPSW_REG_CTL 0x004 #define AM65_CPSW_REG_STAT_PORT_EN 0x014 @@ -2244,7 +2244,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) eth_hw_addr_set(port->ndev, port->slave.mac_addr); port->ndev->min_mtu = AM65_CPSW_MIN_PACKET_SIZE; - port->ndev->max_mtu = AM65_CPSW_MAX_PACKET_SIZE; + port->ndev->max_mtu = AM65_CPSW_MAX_PACKET_SIZE - + (VLAN_ETH_HLEN + ETH_FCS_LEN); port->ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 23cd610bd3766c2b2785e94f1faa9da54990ec00..85cdbdd44fec70d1b20e348c38368f047107eec6 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -26,7 +26,7 @@ config NGBE tristate "Wangxun(R) GbE PCI Express adapters support" depends on PCI select LIBWX - select PHYLIB + select PHYLINK help This driver supports Wangxun(R) GbE PCI Express family of adapters. diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 23355cc408fd7b0bd6ad982ba6d6a069cf91d302..8706223a6e5aa9ceff3fa0b5076400bcaab06cd3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2769,4 +2769,5 @@ void wx_set_ring(struct wx *wx, u32 new_tx_count, } EXPORT_SYMBOL(wx_set_ring); +MODULE_DESCRIPTION("Common library for Wangxun(R) Ethernet drivers."); MODULE_LICENSE("GPL"); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index aecaf5f44374f0a7eb92eb3db46f5a0ec24c5ee9..77e8250282a512ee5cf2f05d9bed4b604e5c210b 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -369,6 +369,12 @@ static int nsim_init_netdevsim_vf(struct netdevsim *ns) return err; } +static void nsim_exit_netdevsim(struct netdevsim *ns) +{ + nsim_udp_tunnels_info_destroy(ns->netdev); + mock_phc_destroy(ns->phc); +} + struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -417,8 +423,7 @@ void nsim_destroy(struct netdevsim *ns) } rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) - nsim_udp_tunnels_info_destroy(dev); - mock_phc_destroy(ns->phc); + nsim_exit_netdevsim(ns); free_netdev(dev); } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index d2aa3d0695e3367410f0b61bbe44e7b849e36d2c..81c20eb4b54b918517866a262404e3641988a66d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -3338,8 +3338,10 @@ static int lan8814_probe(struct phy_device *phydev) #define LAN8841_ADC_CHANNEL_MASK 198 #define LAN8841_PTP_RX_PARSE_L2_ADDR_EN 370 #define LAN8841_PTP_RX_PARSE_IP_ADDR_EN 371 +#define LAN8841_PTP_RX_VERSION 374 #define LAN8841_PTP_TX_PARSE_L2_ADDR_EN 434 #define LAN8841_PTP_TX_PARSE_IP_ADDR_EN 435 +#define LAN8841_PTP_TX_VERSION 438 #define LAN8841_PTP_CMD_CTL 256 #define LAN8841_PTP_CMD_CTL_PTP_ENABLE BIT(2) #define LAN8841_PTP_CMD_CTL_PTP_DISABLE BIT(1) @@ -3383,6 +3385,12 @@ static int lan8841_config_init(struct phy_device *phydev) phy_write_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, LAN8841_PTP_RX_PARSE_IP_ADDR_EN, 0); + /* Disable checking for minorVersionPTP field */ + phy_write_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, + LAN8841_PTP_RX_VERSION, 0xff00); + phy_write_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, + LAN8841_PTP_TX_VERSION, 0xff00); + /* 100BT Clause 40 improvenent errata */ phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, LAN8841_ANALOG_CONTROL_1, @@ -4839,6 +4847,7 @@ static struct phy_driver ksphy_driver[] = { .flags = PHY_POLL_CABLE_TEST, .driver_data = &ksz9131_type, .probe = kszphy_probe, + .soft_reset = genphy_soft_reset, .config_init = ksz9131_config_init, .config_intr = kszphy_config_intr, .config_aneg = ksz9131_config_aneg, diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 6fa679b36290ed528d942b8f3450217ec4a0b1bf..db39dec7f2471c6205db6c0158a229417d255876 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -151,10 +151,6 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned int br_min, br_nom, br_max; __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, }; - phylink_set(modes, Autoneg); - phylink_set(modes, Pause); - phylink_set(modes, Asym_Pause); - /* Decode the bitrate information to MBd */ br_min = br_nom = br_max = 0; if (id->base.br_nominal) { @@ -339,6 +335,10 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, } } + phylink_set(modes, Autoneg); + phylink_set(modes, Pause); + phylink_set(modes, Asym_Pause); + if (bus->sfp_quirk && bus->sfp_quirk->modes) bus->sfp_quirk->modes(id, modes, interfaces); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index ba93bab948e09fcea7f7fed364ca244b219d79d5..18df7ca6619814681adcb879ebb790e5cbaea959 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -752,4 +752,5 @@ EXPORT_SYMBOL(slhc_compress); EXPORT_SYMBOL(slhc_uncompress); EXPORT_SYMBOL(slhc_toss); +MODULE_DESCRIPTION("Compression helpers for SLIP (serial line)"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index e4280e37fec97fed5e0a793fd4be45f53b7e8bd8..0aba3569ccc0d4a19baa9b5b1f05cb03d47b20b9 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -1437,5 +1437,6 @@ out: } #endif +MODULE_DESCRIPTION("SLIP (serial line) protocol module"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_SLIP); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3cb8aa19388415f8f7208f45827514f8c6299f28..d7ce4a1011ea2585bca21ab3fb86978f7fee364d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -4295,10 +4295,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; struct virtqueue **vqs; - int ret = -ENOMEM; - int i, total_vqs; const char **names; + int ret = -ENOMEM; + int total_vqs; bool *ctx; + u16 i; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by @@ -4335,8 +4336,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { callbacks[rxq2vq(i)] = skb_recv_done; callbacks[txq2vq(i)] = skb_xmit_done; - sprintf(vi->rq[i].name, "input.%d", i); - sprintf(vi->sq[i].name, "output.%d", i); + sprintf(vi->rq[i].name, "input.%u", i); + sprintf(vi->sq[i].name, "output.%u", i); names[rxq2vq(i)] = vi->rq[i].name; names[txq2vq(i)] = vi->sq[i].name; if (ctx) diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 8a51cfcff99e5288e0a258a1172d2e06f688068b..cbb99fc5ea9fe7117053325428e66cdecb79f2bc 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -28,6 +28,7 @@ static struct spi_device *g_spi; +MODULE_DESCRIPTION("Slic Maxim DS26522 driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Zhao Qiang"); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 88f760a7cbc35469e20be2d09f9b2cfb92b8362a..d7503aef599f04bec326900fe918a974e55bc5cc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -463,12 +463,25 @@ static void xenvif_get_requests(struct xenvif_queue *queue, } for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; - shinfo->nr_frags++, gop++, nr_slots--) { + nr_slots--) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, flags); + ++txp; + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; if (txp == first) txp = txfrags; @@ -481,20 +494,39 @@ static void xenvif_get_requests(struct xenvif_queue *queue, shinfo = skb_shinfo(nskb); frags = shinfo->frags; - for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, + XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, + flags); + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; } - skb_shinfo(skb)->frag_list = nskb; - } else if (nskb) { + if (shinfo->nr_frags) { + skb_shinfo(skb)->frag_list = nskb; + nskb = NULL; + } + } + + if (nskb) { /* A frag_list skb was allocated but it is no longer needed - * because enough slots were converted to copy ops above. + * because enough slots were converted to copy ops above or some + * were empty. */ kfree_skb(nskb); } diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index a92eb172f0e7eb9a0feed5a05a7b887066c02ec2..4ceced5cefcf1d40d9eef0817c623bd798a49d79 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -29,12 +29,27 @@ static int init_vq(struct virtio_pmem *vpmem) return 0; }; +static int virtio_pmem_validate(struct virtio_device *vdev) +{ + struct virtio_shm_region shm_reg; + + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION) && + !virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID) + ) { + dev_notice(&vdev->dev, "failed to get shared memory region %d\n", + VIRTIO_PMEM_SHMEM_REGION_ID); + __virtio_clear_bit(vdev, VIRTIO_PMEM_F_SHMEM_REGION); + } + return 0; +} + static int virtio_pmem_probe(struct virtio_device *vdev) { struct nd_region_desc ndr_desc = {}; struct nd_region *nd_region; struct virtio_pmem *vpmem; struct resource res; + struct virtio_shm_region shm_reg; int err = 0; if (!vdev->config->get) { @@ -57,10 +72,16 @@ static int virtio_pmem_probe(struct virtio_device *vdev) goto out_err; } - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, - start, &vpmem->start); - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, - size, &vpmem->size); + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) { + virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID); + vpmem->start = shm_reg.addr; + vpmem->size = shm_reg.len; + } else { + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, + start, &vpmem->start); + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, + size, &vpmem->size); + } res.start = vpmem->start; res.end = vpmem->start + vpmem->size - 1; @@ -122,10 +143,17 @@ static void virtio_pmem_remove(struct virtio_device *vdev) virtio_reset_device(vdev); } +static unsigned int features[] = { + VIRTIO_PMEM_F_SHMEM_REGION, +}; + static struct virtio_driver virtio_pmem_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtio_pmem_validate, .probe = virtio_pmem_probe, .remove = virtio_pmem_remove, }; diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index ee341b83eebaf553cbf91a045b048285d590157a..a5c0431c101cf3775509145e3bc7f12c6b64ccd0 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, * should be preferred to 'generated' PSKs, * and SHA-384 should be preferred to SHA-256. */ -struct nvme_tls_psk_priority_list { +static struct nvme_tls_psk_priority_list { bool generated; enum nvme_tcp_tls_cipher cipher; } nvme_tls_psk_prio[] = { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0af61238708370d1fe11d59f091a42d8b7bce3ee..85ab0fcf9e886451fb070b75dcd53be4a4f88f62 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1740,13 +1740,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, struct nvme_ns_head *head) { struct request_queue *queue = disk->queue; - u32 size = queue_logical_block_size(queue); + u32 max_discard_sectors; - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) - ctrl->max_discard_sectors = - nvme_lba_to_sect(head, ctrl->dmrsl); - - if (ctrl->max_discard_sectors == 0) { + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) { + max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl); + } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { + max_discard_sectors = UINT_MAX; + } else { blk_queue_max_discard_sectors(queue, 0); return; } @@ -1754,14 +1754,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_granularity = size; - - /* If discard is already enabled, don't reset queue limits */ + /* + * If discard is already enabled, don't reset queue limits. + * + * This works around the fact that the block layer can't cope well with + * updating the hardware limits when overridden through sysfs. This is + * harmless because discard limits in NVMe are purely advisory. + */ if (queue->limits.max_discard_sectors) return; - blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); - blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); + blk_queue_max_discard_sectors(queue, max_discard_sectors); + if (ctrl->dmrl) + blk_queue_max_discard_segments(queue, ctrl->dmrl); + else + blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); + queue->limits.discard_granularity = queue_logical_block_size(queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -2930,14 +2938,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) struct nvme_id_ctrl_nvm *id; int ret; - if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { - ctrl->max_discard_sectors = UINT_MAX; - ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; - } else { - ctrl->max_discard_sectors = 0; - ctrl->max_discard_segments = 0; - } - /* * Even though NVMe spec explicitly states that MDTS is not applicable * to the write-zeroes, we are cautious and limit the size to the @@ -2967,8 +2967,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (ret) goto free_data; - if (id->dmrl) - ctrl->max_discard_segments = id->dmrl; + ctrl->dmrl = id->dmrl; ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4be7f6822966db94fdd9964ed92640eedafede91..030c8081824065e7fa3d14e1a4918f1c94080565 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -303,14 +303,13 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; - u32 max_discard_sectors; - u32 max_discard_segments; u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED u32 max_zone_append; #endif u16 crdt[3]; u16 oncs; + u8 dmrl; u32 dmrsl; u16 oacs; u16 sqsize; @@ -932,6 +931,10 @@ extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return disk->fops == &nvme_ns_head_ops; +} #else #define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) @@ -1009,6 +1012,10 @@ static inline void nvme_mpath_start_request(struct request *rq) static inline void nvme_mpath_end_request(struct request *rq) { } +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return false; +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_revalidate_zones(struct nvme_ns *ns); @@ -1037,7 +1044,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { - return dev_to_disk(dev)->private_data; + struct gendisk *disk = dev_to_disk(dev); + + WARN_ON(nvme_disk_is_ns_head(disk)); + return disk->private_data; } #ifdef CONFIG_NVME_HWMON diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 61af7ff1a9d6ba96f56f67ab6cdb3c5b5bf9be3b..c1d6357ec98a0107acacdae47024c3110b3cfb9f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct request *abort_req; struct nvme_command cmd = { }; u32 csts = readl(dev->bar + NVME_REG_CSTS); + u8 opcode; /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. @@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, completion polled\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, completion polled\n", + req->tag, nvme_cid(req), nvmeq->qid); return BLK_EH_DONE; } @@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) fallthrough; case NVME_CTRL_DELETING: dev_warn_ratelimited(dev->ctrl.device, - "I/O %d QID %d timeout, disable controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, disable controller\n", + req->tag, nvme_cid(req), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; nvme_dev_disable(dev, true); return BLK_EH_DONE; @@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * command was already aborted once before and still hasn't been * returned to the driver, or if this is the admin queue. */ + opcode = nvme_req(req)->cmd->common.opcode; if (!nvmeq->qid || iod->aborted) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", + req->tag, nvme_cid(req), opcode, + nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } @@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, - "I/O %d (%s) QID %d timeout, aborting\n", - req->tag, - nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode), - nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n", + req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode), + nvmeq->qid, blk_op_str(req_op(req)), req_op(req), + blk_rq_bytes(req)); abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); @@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work) * controller around but remove all namespaces. */ if (dev->online_queues > 1) { + nvme_dbbuf_set(dev); nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); @@ -3408,6 +3411,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1c5c, 0x1D59), /* SK Hynix BC901 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 391b1465ebfd5e0067dfb698f8bd199a9fe3d148..fc3eed00f9ff1196189415ef1bccd0a6c1e02551 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev, struct nvme_command *c, void *data, unsigned int data_len) { if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - bdev->bd_disk->fops == &nvme_ns_head_ops) + nvme_disk_is_ns_head(bdev->bd_disk)) return nvme_send_ns_head_pr_command(bdev, c, data, data_len); return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c89503da24d7a8300ae21c22e3a58df8d382a668..11dde0d830442df31c74499655e86566ab995a66 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1946,9 +1946,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - - dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", - rq->tag, nvme_rdma_queue_idx(queue)); + u8 opcode = req->req.cmd->common.opcode; + u8 fctype = req->req.cmd->fabrics.fctype; + int qid = nvme_rdma_queue_idx(queue); + + dev_warn(ctrl->ctrl.device, + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), opcode, + nvme_opcode_str(qid, opcode, fctype), qid); if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index ac24ad102380600cef13428eb0b3e31c0e32fecc..754e911110420f5f30074762c7787a88b183830a 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); - if (disk->fops == &nvme_bdev_ops) - return nvme_get_ns_from_dev(dev)->head; - else + if (nvme_disk_is_ns_head(disk)) return disk->private_data; + return nvme_get_ns_from_dev(dev)->head; } static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, @@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, } #ifdef CONFIG_NVME_MULTIPATH if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ + /* per-path attr */ + if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 08805f0278106483c10b2b9c787aa35c36e4dcbe..d058d990532bfcf6dd521cfa51f411f60f5913fd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) ctrl->opts->subsysnqn); if (!pskid) { dev_err(ctrl->device, "no valid PSK found\n"); - ret = -ENOKEY; - goto out_free_queue; + return -ENOKEY; } } ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); if (ret) - goto out_free_queue; + return ret; ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); if (ret) @@ -2433,9 +2432,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, - "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", - nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, - opc, nvme_opcode_str(qid, opc, fctype)); + "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), pdu->hdr.type, opc, + nvme_opcode_str(qid, opc, fctype), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index bd59990b525016fb05945d4f65aa199aab61da43..bda7a3009e85127ca27f99e107d61fbf1f3995f2 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1031,7 +1031,7 @@ nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) list_for_each_entry(host, &tgtport->host_list, host_list) { if (host->hosthandle == hosthandle && !host->invalid) { if (nvmet_fc_hostport_get(host)) - return (host); + return host; } } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c65a73433c05f643654616175d5fc9229af753e7..ead349af30f1e0c87ee0adde980aa98b5fdb0e8a 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -995,11 +995,6 @@ fcloop_nport_free(struct kref *ref) { struct fcloop_nport *nport = container_of(ref, struct fcloop_nport, ref); - unsigned long flags; - - spin_lock_irqsave(&fcloop_lock, flags); - list_del(&nport->nport_list); - spin_unlock_irqrestore(&fcloop_lock, flags); kfree(nport); } @@ -1357,6 +1352,8 @@ __unlink_remote_port(struct fcloop_nport *nport) nport->tport->remoteport = NULL; nport->rport = NULL; + list_del(&nport->nport_list); + return rport; } diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 4597bca43a6d87269f557dfa3b35d47da8031ff1..667f9c04f35d538bb361f733e50c62bb7c52d9c3 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -37,6 +37,8 @@ #define NVMET_RDMA_MAX_MDTS 8 #define NVMET_RDMA_MAX_METADATA_MDTS 5 +#define NVMET_RDMA_BACKLOG 128 + struct nvmet_rdma_srq; struct nvmet_rdma_cmd { @@ -1583,8 +1585,19 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } if (queue->host_qid == 0) { - /* Let inflight controller teardown complete */ - flush_workqueue(nvmet_wq); + struct nvmet_rdma_queue *q; + int pending = 0; + + /* Check for pending controller teardown */ + mutex_lock(&nvmet_rdma_queue_mutex); + list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) { + if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl && + q->state == NVMET_RDMA_Q_DISCONNECTING) + pending++; + } + mutex_unlock(&nvmet_rdma_queue_mutex); + if (pending > NVMET_RDMA_BACKLOG) + return NVME_SC_CONNECT_CTRL_BUSY; } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); @@ -1880,7 +1893,7 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - ret = rdma_listen(cm_id, 128); + ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG); if (ret) { pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 4cc27856aa8fefc53d2a77044ea3a3ef927c8ba5..6a1e6bb80062d4753501e07cbcba43870fc00eeb 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -24,6 +24,8 @@ #include "nvmet.h" #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */ +#define NVMET_TCP_BACKLOG 128 static int param_store_val(const char *str, int *val, int min, int max) { @@ -923,7 +925,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */ + icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA); icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; @@ -978,13 +980,13 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; + unsigned int exp_data_len; if (likely(queue->nr_cmds)) { if (unlikely(data->ttag >= queue->nr_cmds)) { pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", queue->idx, data->ttag, queue->nr_cmds); - nvmet_tcp_fatal_error(queue); - return -EPROTO; + goto err_proto; } cmd = &queue->cmds[data->ttag]; } else { @@ -995,19 +997,32 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) pr_err("ttag %u unexpected data offset %u (expected %u)\n", data->ttag, le32_to_cpu(data->data_offset), cmd->rbytes_done); - /* FIXME: use path and transport errors */ - nvmet_req_complete(&cmd->req, - NVME_SC_INVALID_FIELD | NVME_SC_DNR); - return -EPROTO; + goto err_proto; } + exp_data_len = le32_to_cpu(data->hdr.plen) - + nvmet_tcp_hdgst_len(queue) - + nvmet_tcp_ddgst_len(queue) - + sizeof(*data); + cmd->pdu_len = le32_to_cpu(data->data_length); + if (unlikely(cmd->pdu_len != exp_data_len || + cmd->pdu_len == 0 || + cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) { + pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); + goto err_proto; + } cmd->pdu_recv = 0; nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; return 0; + +err_proto: + /* FIXME: use proper transport errors */ + nvmet_tcp_fatal_error(queue); + return -EPROTO; } static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) @@ -1768,7 +1783,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) (int)sizeof(struct nvme_tcp_icreq_pdu)); if (hdr->type == nvme_tcp_icreq && hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) && - hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) { + hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) { pr_debug("queue %d: icreq detected\n", queue->idx); return len; @@ -2053,7 +2068,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) goto err_sock; } - ret = kernel_listen(port->sock, 128); + ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG); if (ret) { pr_err("failed to listen %d on port sock\n", ret); goto err_sock; @@ -2119,8 +2134,19 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) container_of(sq, struct nvmet_tcp_queue, nvme_sq); if (sq->qid == 0) { - /* Let inflight controller teardown complete */ - flush_workqueue(nvmet_wq); + struct nvmet_tcp_queue *q; + int pending = 0; + + /* Check for pending controller teardown */ + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) { + if (q->nvme_sq.ctrl == sq->ctrl && + q->state == NVMET_TCP_Q_DISCONNECTING) + pending++; + } + mutex_unlock(&nvmet_tcp_queue_mutex); + if (pending > NVMET_TCP_BACKLOG) + return NVME_SC_CONNECT_CTRL_BUSY; } queue->nr_cmds = sq->size * 2; diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index bff454d46255b42162667b12a193dc8b7205469a..6ee1f3db81d04071e761b39640e573c9770aa32f 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -211,7 +211,7 @@ const char *nvmet_trace_disk_name(struct trace_seq *p, char *name) return ret; } -const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl) +const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id) { const char *ret = trace_seq_buffer_ptr(p); @@ -224,8 +224,8 @@ const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl) * If we can know the extra data of the connect command in this stage, * we can update this print statement later. */ - if (ctrl) - trace_seq_printf(p, "%d", ctrl->cntlid); + if (ctrl_id) + trace_seq_printf(p, "%d", ctrl_id); else trace_seq_printf(p, "_"); trace_seq_putc(p, 0); diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 6109b3806b12be7dae3d429c083d1fa49ba92c05..7f7ebf9558e505fe83b5ea1d98f52dd9cd3d2dca 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -32,18 +32,24 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \ nvmet_trace_parse_admin_cmd(p, opcode, cdw10))) -const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl); -#define __print_ctrl_name(ctrl) \ - nvmet_trace_ctrl_name(p, ctrl) +const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id); +#define __print_ctrl_id(ctrl_id) \ + nvmet_trace_ctrl_id(p, ctrl_id) const char *nvmet_trace_disk_name(struct trace_seq *p, char *name); #define __print_disk_name(name) \ nvmet_trace_disk_name(p, name) #ifndef TRACE_HEADER_MULTI_READ -static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) +static inline u16 nvmet_req_to_ctrl_id(struct nvmet_req *req) { - return req->sq->ctrl; + /* + * The queue and controller pointers are not valid until an association + * has been established. + */ + if (!req->sq || !req->sq->ctrl) + return 0; + return req->sq->ctrl->cntlid; } static inline void __assign_req_name(char *name, struct nvmet_req *req) @@ -53,8 +59,7 @@ static inline void __assign_req_name(char *name, struct nvmet_req *req) return; } - strncpy(name, req->ns->device_path, - min_t(size_t, DISK_NAME_LEN, strlen(req->ns->device_path))); + strscpy_pad(name, req->ns->device_path, DISK_NAME_LEN); } #endif @@ -63,7 +68,7 @@ TRACE_EVENT(nvmet_req_init, TP_ARGS(req, cmd), TP_STRUCT__entry( __field(struct nvme_command *, cmd) - __field(struct nvmet_ctrl *, ctrl) + __field(u16, ctrl_id) __array(char, disk, DISK_NAME_LEN) __field(int, qid) __field(u16, cid) @@ -76,7 +81,7 @@ TRACE_EVENT(nvmet_req_init, ), TP_fast_assign( __entry->cmd = cmd; - __entry->ctrl = nvmet_req_to_ctrl(req); + __entry->ctrl_id = nvmet_req_to_ctrl_id(req); __assign_req_name(__entry->disk, req); __entry->qid = req->sq->qid; __entry->cid = cmd->common.command_id; @@ -85,12 +90,12 @@ TRACE_EVENT(nvmet_req_init, __entry->flags = cmd->common.flags; __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); - memcpy(__entry->cdw10, &cmd->common.cdw10, + memcpy(__entry->cdw10, &cmd->common.cdws, sizeof(__entry->cdw10)); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, " "meta=%#llx, cmd=(%s, %s)", - __print_ctrl_name(__entry->ctrl), + __print_ctrl_id(__entry->ctrl_id), __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, @@ -104,7 +109,7 @@ TRACE_EVENT(nvmet_req_complete, TP_PROTO(struct nvmet_req *req), TP_ARGS(req), TP_STRUCT__entry( - __field(struct nvmet_ctrl *, ctrl) + __field(u16, ctrl_id) __array(char, disk, DISK_NAME_LEN) __field(int, qid) __field(int, cid) @@ -112,7 +117,7 @@ TRACE_EVENT(nvmet_req_complete, __field(u16, status) ), TP_fast_assign( - __entry->ctrl = nvmet_req_to_ctrl(req); + __entry->ctrl_id = nvmet_req_to_ctrl_id(req); __entry->qid = req->cq->qid; __entry->cid = req->cqe->command_id; __entry->result = le64_to_cpu(req->cqe->result.u64); @@ -120,7 +125,7 @@ TRACE_EVENT(nvmet_req_complete, __assign_req_name(__entry->disk, req); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", - __print_ctrl_name(__entry->ctrl), + __print_ctrl_id(__entry->ctrl_id), __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->result, __entry->status) diff --git a/drivers/of/device.c b/drivers/of/device.c index 6e9572c4af83b9b0dbe06c2945ca380f274d4445..de89f99063758a84d30cf2a8346925feddcc84ca 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -93,12 +93,12 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) int of_dma_configure_id(struct device *dev, struct device_node *np, bool force_dma, const u32 *id) { - const struct iommu_ops *iommu; const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 dma_start = 0; u64 mask, end, size = 0; bool coherent; + int iommu_ret; int ret; if (np == dev->of_node) @@ -181,21 +181,29 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sdma coherent\n", coherent ? " " : " not "); - iommu = of_iommu_configure(dev, np, id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) { + iommu_ret = of_iommu_configure(dev, np, id); + if (iommu_ret == -EPROBE_DEFER) { /* Don't touch range map if it wasn't set from a valid dma-ranges */ if (!ret) dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; - } + } else if (iommu_ret == -ENODEV) { + dev_dbg(dev, "device is not behind an iommu\n"); + } else if (iommu_ret) { + dev_err(dev, "iommu configuration for device failed with %pe\n", + ERR_PTR(iommu_ret)); - dev_dbg(dev, "device is%sbehind an iommu\n", - iommu ? " " : " not "); + /* + * Historically this routine doesn't fail driver probing + * due to errors in of_iommu_configure() + */ + } else + dev_dbg(dev, "device is behind an iommu\n"); - arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + arch_setup_dma_ops(dev, dma_start, size, coherent); - if (!iommu) + if (iommu_ret) of_dma_set_restricted_buffer(dev, np); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 43e11c9502d1bab04a36130a744ea9c3174b4d15..d8f11a078924c1336326456b0e3f37f7b0e66df9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6295,6 +6295,41 @@ int pcie_set_mps(struct pci_dev *dev, int mps) } EXPORT_SYMBOL(pcie_set_mps); +static enum pci_bus_speed to_pcie_link_speed(u16 lnksta) +{ + return pcie_link_speed[FIELD_GET(PCI_EXP_LNKSTA_CLS, lnksta)]; +} + +int pcie_link_speed_mbps(struct pci_dev *pdev) +{ + u16 lnksta; + int err; + + err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); + if (err) + return err; + + switch (to_pcie_link_speed(lnksta)) { + case PCIE_SPEED_2_5GT: + return 2500; + case PCIE_SPEED_5_0GT: + return 5000; + case PCIE_SPEED_8_0GT: + return 8000; + case PCIE_SPEED_16_0GT: + return 16000; + case PCIE_SPEED_32_0GT: + return 32000; + case PCIE_SPEED_64_0GT: + return 64000; + default: + break; + } + + return -EINVAL; +} +EXPORT_SYMBOL(pcie_link_speed_mbps); + /** * pcie_bandwidth_available - determine minimum link settings of a PCIe * device and its bandwidth limitation @@ -6328,8 +6363,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, while (dev) { pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); - next_speed = pcie_link_speed[FIELD_GET(PCI_EXP_LNKSTA_CLS, - lnksta)]; + next_speed = to_pcie_link_speed(lnksta); next_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta); next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed); diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 05eab9014132fc9ff782e855e2c2351e8cedb64d..a4746f6cb8a187e2a1172fd5fc2dd1f7ceee7bb1 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -185,6 +185,10 @@ #define P3D_RG_CDR_BIR_LTD1 GENMASK(28, 24) #define P3D_RG_CDR_BIR_LTD0 GENMASK(12, 8) +#define U3P_U3_PHYD_TOP1 0x100 +#define P3D_RG_PHY_MODE GENMASK(2, 1) +#define P3D_RG_FORCE_PHY_MODE BIT(0) + #define U3P_U3_PHYD_RXDET1 0x128 #define P3D_RG_RXDET_STB2_SET GENMASK(17, 9) @@ -327,6 +331,7 @@ struct mtk_phy_instance { int discth; int pre_emphasis; bool bc12_en; + bool type_force_mode; }; struct mtk_tphy { @@ -768,6 +773,23 @@ static void u3_phy_instance_init(struct mtk_tphy *tphy, void __iomem *phya = u3_banks->phya; void __iomem *phyd = u3_banks->phyd; + if (instance->type_force_mode) { + /* force phy as usb mode, default is pcie rc mode */ + mtk_phy_update_field(phyd + U3P_U3_PHYD_TOP1, P3D_RG_PHY_MODE, 1); + mtk_phy_set_bits(phyd + U3P_U3_PHYD_TOP1, P3D_RG_FORCE_PHY_MODE); + /* power down phy by ip and pipe reset */ + mtk_phy_set_bits(u3_banks->chip + U3P_U3_CHIP_GPIO_CTLD, + P3C_FORCE_IP_SW_RST | P3C_MCU_BUS_CK_GATE_EN); + mtk_phy_set_bits(u3_banks->chip + U3P_U3_CHIP_GPIO_CTLE, + P3C_RG_SWRST_U3_PHYD | P3C_RG_SWRST_U3_PHYD_FORCE_EN); + udelay(10); + /* power on phy again */ + mtk_phy_clear_bits(u3_banks->chip + U3P_U3_CHIP_GPIO_CTLD, + P3C_FORCE_IP_SW_RST | P3C_MCU_BUS_CK_GATE_EN); + mtk_phy_clear_bits(u3_banks->chip + U3P_U3_CHIP_GPIO_CTLE, + P3C_RG_SWRST_U3_PHYD | P3C_RG_SWRST_U3_PHYD_FORCE_EN); + } + /* gating PCIe Analog XTAL clock */ mtk_phy_set_bits(u3_banks->spllc + U3P_SPLLC_XTALCTL3, XC3_RG_U3_XTAL_RX_PWD | XC3_RG_U3_FRC_XTAL_RX_PWD); @@ -1120,6 +1142,9 @@ static void phy_parse_property(struct mtk_tphy *tphy, { struct device *dev = &instance->phy->dev; + if (instance->type == PHY_TYPE_USB3) + instance->type_force_mode = device_property_read_bool(dev, "mediatek,force-mode"); + if (instance->type != PHY_TYPE_USB2) return; diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index 840b7f8a31c5fd743af6611673fed4072c99fbd8..ee4ce42496985d28baf9a7e3172dd902cc7254f9 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -6,11 +6,11 @@ * */ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include struct can_transceiver_data { diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 96a0b1e111f34997f664d63151038cf582c2fb67..d9be6a4d538387fac816d4d46c781ebd84cc29de 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -959,7 +959,7 @@ struct phy *phy_create(struct device *dev, struct device_node *node, if (!phy) return ERR_PTR(-ENOMEM); - id = ida_simple_get(&phy_ida, 0, 0, GFP_KERNEL); + id = ida_alloc(&phy_ida, GFP_KERNEL); if (id < 0) { dev_err(dev, "unable to get id\n"); ret = id; @@ -1232,7 +1232,7 @@ static void phy_release(struct device *dev) dev_vdbg(dev, "releasing '%s'\n", dev_name(dev)); debugfs_remove_recursive(phy->debugfs); regulator_put(phy->pwr); - ida_simple_remove(&phy_ida, phy->id); + ida_free(&phy_ida, phy->id); kfree(phy); } diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index f6c727249104fdd3dfc016ec908d792f3dd65316..1ad10110dd2544b77ae38a1459497ae6e2905b84 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1203,6 +1203,127 @@ static const struct qmp_phy_init_tbl sc8280xp_usb43dp_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), }; +static const struct qmp_phy_init_tbl x1e80100_usb43dp_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x62), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0xc2), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE1, 0xc2), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_CFG, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MSB_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MSB_MODE1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE0, 0xba), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE1, 0xba), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x13), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_HS_SWITCH_SEL_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CORE_CLK_DIV_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0xa0), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x76), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO_MODE1, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE0, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE1, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_INITVAL2, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAXVAL2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SVS_MODE_CLK_SEL, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0a), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb43dp_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_LANE_MODE_1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_LANE_MODE_2, 0x50), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_LANE_MODE_3, 0x50), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX, 0x0a), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb43dp_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_SIGDET_CNTRL, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_SIGDET_DEGLITCH_CNTRL, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_SIGDET_ENABLES, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B0, 0xc3), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B1, 0xc3), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B2, 0xd8), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B3, 0x9e), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B4, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B5, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE_0_1_B6, 0x64), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B0, 0xd6), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B1, 0xee), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B2, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B3, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B4, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B5, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_MODE_RATE2_B6, 0xe3), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_IVCM_CAL_CODE_OVERRIDE, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_RX_IVCM_CAL_CTRL2, 0x80), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_RX_SUMMER_CAL_SPD_MODE, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_DFE_CTLE_POST_CAL_OFFSET, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_PI_CONTROLS, 0x15), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_PI_CTRL1, 0xd0), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_PI_CTRL2, 0x48), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_SB2_GAIN2_RATE2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_RX_IVCM_POSTCAL_OFFSET, 0x7c), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_VGA_CAL_CNTRL1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_VGA_CAL_MAN_VAL, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_DFE_DAC_ENABLE1, 0x88), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_DFE_3, 0x45), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_GM_CAL, 0x0d), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_FO_GAIN_RATE2, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_UCDR_SO_GAIN_RATE2, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_Q_PI_INTRINSIC_BIAS_RATE32, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_RX_RX_BKUP_CTRL1, 0x14), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RX_SIGDET_LVL, 0x55), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_CDR_RESET_TIME, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG1, 0xd4), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG2, 0x30), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG5, 0x10), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), +}; + /* list of regulators */ struct qmp_regulator_data { const char *name; @@ -1682,6 +1803,51 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { .regs = qmp_v5_5nm_usb3phy_regs_layout, }; +static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = { + .offsets = &qmp_combo_offsets_v5, + + .serdes_tbl = x1e80100_usb43dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_serdes_tbl), + .tx_tbl = x1e80100_usb43dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_tx_tbl), + .rx_tbl = x1e80100_usb43dp_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_rx_tbl), + .pcs_tbl = x1e80100_usb43dp_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_pcs_tbl), + .pcs_usb_tbl = x1e80100_usb43dp_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_pcs_usb_tbl), + + .dp_serdes_tbl = qmp_v6_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl), + .dp_tx_tbl = qmp_v6_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v5_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v5_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, + + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, + + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v45_usb3phy_regs_layout, +}; + static const struct qmp_phy_cfg sm6350_usb3dpphy_cfg = { .offsets = &qmp_combo_offsets_v3, @@ -3518,6 +3684,14 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sm8550-qmp-usb3-dp-phy", .data = &sm8550_usb3dpphy_cfg, }, + { + .compatible = "qcom,sm8650-qmp-usb3-dp-phy", + .data = &sm8550_usb3dpphy_cfg, + }, + { + .compatible = "qcom,x1e80100-qmp-usb3-dp-phy", + .data = &x1e80100_usb3dpphy_cfg, + }, { } }; MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index b64598ac59f4d9d928105a17af0186b0038e1c55..2af7115ef96891ea33443bbcbf823c3d3c03fafd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1909,6 +1909,35 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_FOM_EQ_CONFIG5, 0xf2), }; +static const struct qmp_phy_init_tbl sm8650_qmp_gen4x2_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_3, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_PI_CONTROLS, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_SO_ACC_DEFAULT_VAL_RATE3, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_IVCM_CAL_CTRL2, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_3, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_GM_CAL, 0x0d), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_EQU_ADAPTOR_CNTRL4, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_SIGDET_ENABLES, 0x1c), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_PHPRE_CTRL, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B0, 0xd3), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B1, 0xd3), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B2, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B3, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B4, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B5, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B6, 0xee), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B0, 0x23), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B1, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B2, 0x60), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B3, 0xdf), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B4, 0x43), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B5, 0x76), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B6, 0xff), +}; + static const struct qmp_phy_init_tbl sa8775p_qmp_gen4x2_pcie_serdes_alt_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), @@ -3047,6 +3076,36 @@ static const struct qmp_phy_cfg sm8550_qmp_gen4x2_pciephy_cfg = { .has_nocsr_reset = true, }; +static const struct qmp_phy_cfg sm8650_qmp_gen4x2_pciephy_cfg = { + .lanes = 2, + + .offsets = &qmp_pcie_offsets_v6_20, + + .tbls = { + .serdes = sm8550_qmp_gen4x2_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8550_qmp_gen4x2_pcie_serdes_tbl), + .tx = sm8550_qmp_gen4x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8550_qmp_gen4x2_pcie_tx_tbl), + .rx = sm8650_qmp_gen4x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8650_qmp_gen4x2_pcie_rx_tbl), + .pcs = sm8550_qmp_gen4x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8550_qmp_gen4x2_pcie_pcs_tbl), + .pcs_misc = sm8550_qmp_gen4x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8550_qmp_gen4x2_pcie_pcs_misc_tbl), + .ln_shrd = sm8550_qmp_gen4x2_pcie_ln_shrd_tbl, + .ln_shrd_num = ARRAY_SIZE(sm8550_qmp_gen4x2_pcie_ln_shrd_tbl), + }, + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = sm8550_qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(sm8550_qmp_phy_vreg_l), + .regs = pciephy_v5_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS_4_20, + .has_nocsr_reset = true, +}; + static const struct qmp_phy_cfg sa8775p_qmp_gen4x2_pciephy_cfg = { .lanes = 2, .offsets = &qmp_pcie_offsets_v5_20, @@ -3820,6 +3879,12 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sm8550-qmp-gen4x2-pcie-phy", .data = &sm8550_qmp_gen4x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8650-qmp-gen3x2-pcie-phy", + .data = &sm8550_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8650-qmp-gen4x2-pcie-phy", + .data = &sm8650_qmp_gen4x2_pciephy_cfg, }, { }, }; diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-ufs-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-ufs-v6.h index c23d5e41e25b552a7b7e64a48587cdaf162c678f..fe6c450f612382b281ccceab453a086373a27ab0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-ufs-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-ufs-v6.h @@ -12,6 +12,7 @@ #define QPHY_V6_PCS_UFS_SW_RESET 0x008 #define QPHY_V6_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB 0x00c #define QPHY_V6_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB 0x010 +#define QPHY_V6_PCS_UFS_PCS_CTRL1 0x020 #define QPHY_V6_PCS_UFS_PLL_CNTL 0x02c #define QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL 0x030 #define QPHY_V6_PCS_UFS_TX_SMALL_AMP_DRV_LVL 0x038 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v7.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v7.h new file mode 100644 index 0000000000000000000000000000000000000000..24368d45ae7641b1282af1ec93d5c285ec1198bc --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v7.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_PCS_USB_V7_H_ +#define QCOM_PHY_QMP_PCS_USB_V7_H_ + +#define QPHY_V7_PCS_USB3_POWER_STATE_CONFIG1 0x00 +#define QPHY_V7_PCS_USB3_AUTONOMOUS_MODE_CTRL 0x08 +#define QPHY_V7_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR 0x14 +#define QPHY_V7_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL 0x18 +#define QPHY_V7_PCS_USB3_RXEQTRAINING_DFE_TIME_S2 0x3c +#define QPHY_V7_PCS_USB3_RCVR_DTCT_DLY_U3_L 0x40 +#define QPHY_V7_PCS_USB3_RCVR_DTCT_DLY_U3_H 0x44 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v7.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v7.h new file mode 100644 index 0000000000000000000000000000000000000000..c7759892ed2ea046b372ffac23c3ab75c8015a2b --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v7.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_PCS_V7_H_ +#define QCOM_PHY_QMP_PCS_V7_H_ + +/* Only for QMP V7 PHY - USB/PCIe PCS registers */ +#define QPHY_V7_PCS_SW_RESET 0x000 +#define QPHY_V7_PCS_PCS_STATUS1 0x014 +#define QPHY_V7_PCS_POWER_DOWN_CONTROL 0x040 +#define QPHY_V7_PCS_START_CONTROL 0x044 +#define QPHY_V7_PCS_POWER_STATE_CONFIG1 0x090 +#define QPHY_V7_PCS_LOCK_DETECT_CONFIG1 0x0c4 +#define QPHY_V7_PCS_LOCK_DETECT_CONFIG2 0x0c8 +#define QPHY_V7_PCS_LOCK_DETECT_CONFIG3 0x0cc +#define QPHY_V7_PCS_LOCK_DETECT_CONFIG6 0x0d8 +#define QPHY_V7_PCS_REFGEN_REQ_CONFIG1 0x0dc +#define QPHY_V7_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V7_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 +#define QPHY_V7_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 +#define QPHY_V7_PCS_RATE_SLEW_CNTRL1 0x198 +#define QPHY_V7_PCS_CDR_RESET_TIME 0x1b0 +#define QPHY_V7_PCS_ALIGN_DETECT_CONFIG1 0x1c0 +#define QPHY_V7_PCS_ALIGN_DETECT_CONFIG2 0x1c4 +#define QPHY_V7_PCS_PCS_TX_RX_CONFIG 0x1d0 +#define QPHY_V7_PCS_EQ_CONFIG1 0x1dc +#define QPHY_V7_PCS_EQ_CONFIG2 0x1e0 +#define QPHY_V7_PCS_EQ_CONFIG5 0x1ec + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h index f420f8faf16a7aa5e47b60eadd4a8d2abc09c673..ec7291424dd1f1bb7f706bbb5b77419f76d0bfda 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h @@ -22,6 +22,8 @@ #define QSERDES_V6_COM_DIV_FRAC_START2_MODE1 0x34 #define QSERDES_V6_COM_DIV_FRAC_START3_MODE1 0x38 #define QSERDES_V6_COM_HSCLK_SEL_1 0x3c +#define QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE1 0x40 +#define QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE1 0x44 #define QSERDES_V6_COM_VCO_TUNE1_MODE1 0x48 #define QSERDES_V6_COM_VCO_TUNE2_MODE1 0x4c #define QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE1 0x50 @@ -48,6 +50,7 @@ #define QSERDES_V6_COM_VCO_TUNE2_MODE0 0xac #define QSERDES_V6_COM_BG_TIMER 0xbc #define QSERDES_V6_COM_SSC_EN_CENTER 0xc0 +#define QSERDES_V6_COM_SSC_ADJ_PER1 0xc4 #define QSERDES_V6_COM_SSC_PER1 0xcc #define QSERDES_V6_COM_SSC_PER2 0xd0 #define QSERDES_V6_COM_PLL_POST_DIV_MUX 0xd8 @@ -56,6 +59,7 @@ #define QSERDES_V6_COM_SYS_CLK_CTRL 0xe4 #define QSERDES_V6_COM_SYSCLK_BUF_ENABLE 0xe8 #define QSERDES_V6_COM_PLL_IVCO 0xf4 +#define QSERDES_V6_COM_PLL_IVCO_MODE1 0xf8 #define QSERDES_V6_COM_SYSCLK_EN_SEL 0x110 #define QSERDES_V6_COM_RESETSM_CNTRL 0x118 #define QSERDES_V6_COM_LOCK_CMP_EN 0x120 @@ -63,6 +67,7 @@ #define QSERDES_V6_COM_VCO_TUNE_CTRL 0x13c #define QSERDES_V6_COM_VCO_TUNE_MAP 0x140 #define QSERDES_V6_COM_VCO_TUNE_INITVAL2 0x148 +#define QSERDES_V6_COM_VCO_TUNE_MAXVAL2 0x158 #define QSERDES_V6_COM_CLK_SELECT 0x164 #define QSERDES_V6_COM_CORE_CLK_EN 0x170 #define QSERDES_V6_COM_CMN_CONFIG_1 0x174 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v7.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v7.h new file mode 100644 index 0000000000000000000000000000000000000000..7430f49214779855df1cf40acac96440957f9e6d --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v7.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_QSERDES_COM_V7_H_ +#define QCOM_PHY_QMP_QSERDES_COM_V7_H_ + +/* Only for QMP V7 PHY - QSERDES COM registers */ + +#define QSERDES_V7_COM_SSC_STEP_SIZE1_MODE1 0x00 +#define QSERDES_V7_COM_SSC_STEP_SIZE2_MODE1 0x04 +#define QSERDES_V7_COM_CP_CTRL_MODE1 0x10 +#define QSERDES_V7_COM_PLL_RCTRL_MODE1 0x14 +#define QSERDES_V7_COM_PLL_CCTRL_MODE1 0x18 +#define QSERDES_V7_COM_CORECLK_DIV_MODE1 0x1c +#define QSERDES_V7_COM_LOCK_CMP1_MODE1 0x20 +#define QSERDES_V7_COM_LOCK_CMP2_MODE1 0x24 +#define QSERDES_V7_COM_DEC_START_MODE1 0x28 +#define QSERDES_V7_COM_DEC_START_MSB_MODE1 0x2c +#define QSERDES_V7_COM_DIV_FRAC_START1_MODE1 0x30 +#define QSERDES_V7_COM_DIV_FRAC_START2_MODE1 0x34 +#define QSERDES_V7_COM_DIV_FRAC_START3_MODE1 0x38 +#define QSERDES_V7_COM_HSCLK_SEL_1 0x3c +#define QSERDES_V7_COM_INTEGLOOP_GAIN0_MODE1 0x40 +#define QSERDES_V7_COM_INTEGLOOP_GAIN1_MODE1 0x44 +#define QSERDES_V7_COM_VCO_TUNE1_MODE1 0x48 +#define QSERDES_V7_COM_VCO_TUNE2_MODE1 0x4c +#define QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE1_MODE1 0x50 +#define QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE2_MODE1 0x54 +#define QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE1_MODE0 0x58 +#define QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE2_MODE0 0x5c +#define QSERDES_V7_COM_SSC_STEP_SIZE1_MODE0 0x60 +#define QSERDES_V7_COM_SSC_STEP_SIZE2_MODE0 0x64 +#define QSERDES_V7_COM_CP_CTRL_MODE0 0x70 +#define QSERDES_V7_COM_PLL_RCTRL_MODE0 0x74 +#define QSERDES_V7_COM_PLL_CCTRL_MODE0 0x78 +#define QSERDES_V7_COM_PLL_CORE_CLK_DIV_MODE0 0x7c +#define QSERDES_V7_COM_LOCK_CMP1_MODE0 0x80 +#define QSERDES_V7_COM_LOCK_CMP2_MODE0 0x84 +#define QSERDES_V7_COM_DEC_START_MODE0 0x88 +#define QSERDES_V7_COM_DEC_START_MSB_MODE0 0x8c +#define QSERDES_V7_COM_DIV_FRAC_START1_MODE0 0x90 +#define QSERDES_V7_COM_DIV_FRAC_START2_MODE0 0x94 +#define QSERDES_V7_COM_DIV_FRAC_START3_MODE0 0x98 +#define QSERDES_V7_COM_HSCLK_HS_SWITCH_SEL_1 0x9c +#define QSERDES_V7_COM_INTEGLOOP_GAIN0_MODE0 0xa0 +#define QSERDES_V7_COM_INTEGLOOP_GAIN1_MODE0 0xa4 +#define QSERDES_V7_COM_VCO_TUNE1_MODE0 0xa8 +#define QSERDES_V7_COM_VCO_TUNE2_MODE0 0xac +#define QSERDES_V7_COM_BG_TIMER 0xbc +#define QSERDES_V7_COM_SSC_EN_CENTER 0xc0 +#define QSERDES_V7_COM_SSC_ADJ_PER1 0xc4 +#define QSERDES_V7_COM_SSC_PER1 0xcc +#define QSERDES_V7_COM_SSC_PER2 0xd0 +#define QSERDES_V7_COM_PLL_POST_DIV_MUX 0xd8 +#define QSERDES_V7_COM_PLL_BIAS_EN_CLK_BUFLR_EN 0xdc +#define QSERDES_V7_COM_CLK_ENABLE1 0xe0 +#define QSERDES_V7_COM_SYS_CLK_CTRL 0xe4 +#define QSERDES_V7_COM_SYSCLK_BUF_ENABLE 0xe8 +#define QSERDES_V7_COM_PLL_IVCO 0xf4 +#define QSERDES_V7_COM_PLL_IVCO_MODE1 0xf8 +#define QSERDES_V7_COM_SYSCLK_EN_SEL 0x110 +#define QSERDES_V7_COM_RESETSM_CNTRL 0x118 +#define QSERDES_V7_COM_LOCK_CMP_EN 0x120 +#define QSERDES_V7_COM_LOCK_CMP_CFG 0x124 +#define QSERDES_V7_COM_VCO_TUNE_CTRL 0x13c +#define QSERDES_V7_COM_VCO_TUNE_MAP 0x140 +#define QSERDES_V7_COM_VCO_TUNE_INITVAL2 0x148 +#define QSERDES_V7_COM_VCO_TUNE_MAXVAL2 0x158 +#define QSERDES_V7_COM_CLK_SELECT 0x164 +#define QSERDES_V7_COM_CORE_CLK_EN 0x170 +#define QSERDES_V7_COM_CMN_CONFIG_1 0x174 +#define QSERDES_V7_COM_SVS_MODE_CLK_SEL 0x17c +#define QSERDES_V7_COM_CMN_MISC_1 0x184 +#define QSERDES_V7_COM_CMN_MODE 0x188 +#define QSERDES_V7_COM_PLL_VCO_DC_LEVEL_CTRL 0x198 +#define QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_1 0x1a4 +#define QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_2 0x1a8 +#define QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_3 0x1ac +#define QSERDES_V7_COM_ADDITIONAL_MISC 0x1b4 +#define QSERDES_V7_COM_ADDITIONAL_MISC_2 0x1b8 +#define QSERDES_V7_COM_ADDITIONAL_MISC_3 0x1bc +#define QSERDES_V7_COM_CMN_STATUS 0x1d0 +#define QSERDES_V7_COM_C_READY_STATUS 0x1f8 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v6.h index 15bcb4ba91399894b8c934b584d9ae76c083eafc..35d497fd9f9a4420e8c02ae8dd91de011c94c14a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v6.h @@ -10,10 +10,18 @@ #define QSERDES_UFS_V6_TX_RES_CODE_LANE_RX 0x2c #define QSERDES_UFS_V6_TX_RES_CODE_LANE_OFFSET_TX 0x30 #define QSERDES_UFS_V6_TX_RES_CODE_LANE_OFFSET_RX 0x34 +#define QSERDES_UFS_V6_TX_LANE_MODE_1 0x7c +#define QSERDES_UFS_V6_TX_FR_DCC_CTRL 0x108 #define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE2 0x08 #define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE4 0x10 +#define QSERDES_UFS_V6_RX_UCDR_SO_SATURATION 0x28 +#define QSERDES_UFS_V6_RX_UCDR_PI_CTRL1 0x58 +#define QSERDES_UFS_V6_RX_RX_TERM_BW_CTRL0 0xc4 +#define QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE2 0xd4 +#define QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE4 0xdc #define QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL 0x178 +#define QSERDES_UFS_V6_RX_INTERFACE_MODE 0x1e0 #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B0 0x208 #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B1 0x20c #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B3 0x214 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h index 8883e1de730eff83a8ca3cd7065e78fe93e4add8..23ffcfae9efab4a9e081414f9b3bbd0079d34f18 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6.h @@ -23,6 +23,7 @@ #define QSERDES_V6_TX_PARRATE_REC_DETECT_IDLE_EN 0x60 #define QSERDES_V6_TX_BIST_PATTERN7 0x7c #define QSERDES_V6_TX_LANE_MODE_1 0x84 +#define QSERDES_V6_TX_LANE_MODE_2 0x88 #define QSERDES_V6_TX_LANE_MODE_3 0x8c #define QSERDES_V6_TX_LANE_MODE_4 0x90 #define QSERDES_V6_TX_LANE_MODE_5 0x94 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_20.h index 5385a8b60970748373f890a6f17c5a528ae421dc..6ed5339fd2ea86dd4a69df077887ea9a0713dcdd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_20.h @@ -15,10 +15,13 @@ #define QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_2 0x08 #define QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_3 0x0c +#define QSERDES_V6_20_RX_UCDR_SO_GAIN_RATE_2 0x18 #define QSERDES_V6_20_RX_UCDR_PI_CONTROLS 0x20 #define QSERDES_V6_20_RX_UCDR_SO_ACC_DEFAULT_VAL_RATE3 0x34 #define QSERDES_V6_20_RX_IVCM_CAL_CTRL2 0x9c #define QSERDES_V6_20_RX_IVCM_POSTCAL_OFFSET 0xa0 +#define QSERDES_V6_20_RX_DFE_1 0xac +#define QSERDES_V6_20_RX_DFE_2 0xb0 #define QSERDES_V6_20_RX_DFE_3 0xb4 #define QSERDES_V6_20_RX_VGA_CAL_MAN_VAL 0xe8 #define QSERDES_V6_20_RX_GM_CAL 0x10c @@ -41,5 +44,6 @@ #define QSERDES_V6_20_RX_MODE_RATE3_B4 0x220 #define QSERDES_V6_20_RX_MODE_RATE3_B5 0x224 #define QSERDES_V6_20_RX_MODE_RATE3_B6 0x228 +#define QSERDES_V6_20_RX_BKUP_CTRL1 0x22c #endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h new file mode 100644 index 0000000000000000000000000000000000000000..a814ad11af071b187c8c9b13054f506dccf1aa81 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ +#define QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ + +#define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX 0x30 +#define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX 0x34 +#define QSERDES_V6_N4_TX_LANE_MODE_1 0x78 +#define QSERDES_V6_N4_TX_LANE_MODE_2 0x7c +#define QSERDES_V6_N4_TX_LANE_MODE_3 0x80 + +#define QSERDES_V6_N4_RX_UCDR_FO_GAIN_RATE2 0x8 +#define QSERDES_V6_N4_RX_UCDR_SO_GAIN_RATE2 0x18 +#define QSERDES_V6_N4_RX_UCDR_PI_CONTROLS 0x20 +#define QSERDES_V6_N4_RX_IVCM_CAL_CODE_OVERRIDE 0x94 +#define QSERDES_V6_N4_RX_RX_IVCM_CAL_CTRL2 0x9c +#define QSERDES_V6_N4_RX_RX_IVCM_POSTCAL_OFFSET 0xa0 +#define QSERDES_V6_N4_RX_DFE_3 0xb4 +#define QSERDES_V6_N4_RX_VGA_CAL_CNTRL1 0xe0 +#define QSERDES_V6_N4_RX_VGA_CAL_MAN_VAL 0xe8 +#define QSERDES_V6_N4_RX_GM_CAL 0x10c +#define QSERDES_V6_N4_RX_SIGDET_ENABLES 0x148 +#define QSERDES_V6_N4_RX_SIGDET_CNTRL 0x14c +#define QSERDES_V6_N4_RX_SIGDET_DEGLITCH_CNTRL 0x154 +#define QSERDES_V6_N4_RX_DFE_CTLE_POST_CAL_OFFSET 0x194 +#define QSERDES_V6_N4_RX_Q_PI_INTRINSIC_BIAS_RATE32 0x1dc +#define QSERDES_V6_N4_RX_UCDR_PI_CTRL1 0x23c +#define QSERDES_V6_N4_RX_UCDR_PI_CTRL2 0x240 +#define QSERDES_V6_N4_RX_UCDR_SB2_GAIN2_RATE2 0x27c +#define QSERDES_V6_N4_RX_DFE_DAC_ENABLE1 0x298 +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B0 0x2b8 +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B1 0x2bc +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B2 0x2c0 +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B3 0x2c4 +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B4 0x2c8 +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B5 0x2cc +#define QSERDES_V6_N4_RX_MODE_RATE_0_1_B6 0x2d0 +#define QSERDES_V6_N4_RX_MODE_RATE2_B0 0x2d4 +#define QSERDES_V6_N4_RX_MODE_RATE2_B1 0x2d8 +#define QSERDES_V6_N4_RX_MODE_RATE2_B2 0x2dc +#define QSERDES_V6_N4_RX_MODE_RATE2_B3 0x2e0 +#define QSERDES_V6_N4_RX_MODE_RATE2_B4 0x2e4 +#define QSERDES_V6_N4_RX_MODE_RATE2_B5 0x2e8 +#define QSERDES_V6_N4_RX_MODE_RATE2_B6 0x2ec +#define QSERDES_V6_N4_RX_RX_SUMMER_CAL_SPD_MODE 0x30c +#define QSERDES_V6_N4_RX_RX_BKUP_CTRL1 0x310 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v7.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v7.h new file mode 100644 index 0000000000000000000000000000000000000000..91f865b11347af82c38a33e08bcae7b67a7bec26 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v7.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_QSERDES_TXRX_V7_H_ +#define QCOM_PHY_QMP_QSERDES_TXRX_V7_H_ + +#define QSERDES_V7_TX_CLKBUF_ENABLE 0x08 +#define QSERDES_V7_TX_RESET_TSYNC_EN 0x1c +#define QSERDES_V7_TX_PRE_STALL_LDO_BOOST_EN 0x20 +#define QSERDES_V7_TX_TX_BAND 0x24 +#define QSERDES_V7_TX_INTERFACE_SELECT 0x2c +#define QSERDES_V7_TX_RES_CODE_LANE_TX 0x34 +#define QSERDES_V7_TX_RES_CODE_LANE_RX 0x38 +#define QSERDES_V7_TX_RES_CODE_LANE_OFFSET_TX 0x3c +#define QSERDES_V7_TX_RES_CODE_LANE_OFFSET_RX 0x40 +#define QSERDES_V7_TX_PARRATE_REC_DETECT_IDLE_EN 0x60 +#define QSERDES_V7_TX_BIST_PATTERN7 0x7c +#define QSERDES_V7_TX_LANE_MODE_1 0x84 +#define QSERDES_V7_TX_LANE_MODE_2 0x88 +#define QSERDES_V7_TX_LANE_MODE_3 0x8c +#define QSERDES_V7_TX_LANE_MODE_4 0x90 +#define QSERDES_V7_TX_LANE_MODE_5 0x94 +#define QSERDES_V7_TX_RCV_DETECT_LVL_2 0xa4 +#define QSERDES_V7_TX_TRAN_DRVR_EMP_EN 0xc0 +#define QSERDES_V7_TX_TX_INTERFACE_MODE 0xc4 +#define QSERDES_V7_TX_VMODE_CTRL1 0xc8 +#define QSERDES_V7_TX_PI_QEC_CTRL 0xe4 + +#define QSERDES_V7_RX_UCDR_FO_GAIN 0x08 +#define QSERDES_V7_RX_UCDR_SO_GAIN 0x14 +#define QSERDES_V7_RX_UCDR_FASTLOCK_FO_GAIN 0x30 +#define QSERDES_V7_RX_UCDR_SO_SATURATION_AND_ENABLE 0x34 +#define QSERDES_V7_RX_UCDR_FASTLOCK_COUNT_LOW 0x3c +#define QSERDES_V7_RX_UCDR_FASTLOCK_COUNT_HIGH 0x40 +#define QSERDES_V7_RX_UCDR_PI_CONTROLS 0x44 +#define QSERDES_V7_RX_UCDR_SB2_THRESH1 0x4c +#define QSERDES_V7_RX_UCDR_SB2_THRESH2 0x50 +#define QSERDES_V7_RX_UCDR_SB2_GAIN1 0x54 +#define QSERDES_V7_RX_UCDR_SB2_GAIN2 0x58 +#define QSERDES_V7_RX_AUX_DATA_TCOARSE_TFINE 0x60 +#define QSERDES_V7_RX_TX_ADAPT_POST_THRESH 0xcc +#define QSERDES_V7_RX_VGA_CAL_CNTRL1 0xd4 +#define QSERDES_V7_RX_VGA_CAL_CNTRL2 0xd8 +#define QSERDES_V7_RX_GM_CAL 0xdc +#define QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL2 0xec +#define QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL3 0xf0 +#define QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL4 0xf4 +#define QSERDES_V7_RX_RX_IDAC_TSETTLE_LOW 0xf8 +#define QSERDES_V7_RX_RX_IDAC_TSETTLE_HIGH 0xfc +#define QSERDES_V7_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1 0x110 +#define QSERDES_V7_RX_SIDGET_ENABLES 0x118 +#define QSERDES_V7_RX_SIGDET_CNTRL 0x11c +#define QSERDES_V7_RX_SIGDET_DEGLITCH_CNTRL 0x124 +#define QSERDES_V7_RX_RX_MODE_00_LOW 0x15c +#define QSERDES_V7_RX_RX_MODE_00_HIGH 0x160 +#define QSERDES_V7_RX_RX_MODE_00_HIGH2 0x164 +#define QSERDES_V7_RX_RX_MODE_00_HIGH3 0x168 +#define QSERDES_V7_RX_RX_MODE_00_HIGH4 0x16c +#define QSERDES_V7_RX_RX_MODE_01_LOW 0x170 +#define QSERDES_V7_RX_RX_MODE_01_HIGH 0x174 +#define QSERDES_V7_RX_RX_MODE_01_HIGH2 0x178 +#define QSERDES_V7_RX_RX_MODE_01_HIGH3 0x17c +#define QSERDES_V7_RX_RX_MODE_01_HIGH4 0x180 +#define QSERDES_V7_RX_RX_MODE_10_LOW 0x184 +#define QSERDES_V7_RX_RX_MODE_10_HIGH 0x188 +#define QSERDES_V7_RX_RX_MODE_10_HIGH2 0x18c +#define QSERDES_V7_RX_RX_MODE_10_HIGH3 0x190 +#define QSERDES_V7_RX_RX_MODE_10_HIGH4 0x194 +#define QSERDES_V7_RX_DFE_EN_TIMER 0x1a0 +#define QSERDES_V7_RX_DFE_CTLE_POST_CAL_OFFSET 0x1a4 +#define QSERDES_V7_RX_DCC_CTRL1 0x1a8 +#define QSERDES_V7_RX_VTH_CODE 0x1b0 +#define QSERDES_V7_RX_SIGDET_CAL_CTRL1 0x1e4 +#define QSERDES_V7_RX_SIGDET_CAL_TRIM 0x1f8 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 514fa14df63452dd5505b50697fde7953caa6592..3c2e6255e26f66d21fec72595c680e5c2cccc9c4 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -763,22 +763,26 @@ static const struct qmp_phy_init_tbl sm8550_ufsphy_serdes[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x14), QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x7f), QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x06), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x4c), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x0a), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x18), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x14), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x99), - QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x07), +}; + +static const struct qmp_phy_init_tbl sm8550_ufsphy_hs_b_serdes[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x44), }; static const struct qmp_phy_init_tbl sm8550_ufsphy_tx[] = { - QMP_PHY_INIT_CFG(QSERDES_V6_TX_LANE_MODE_1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_LANE_MODE_1, 0x05), QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_RES_CODE_LANE_OFFSET_TX, 0x07), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_FR_DCC_CTRL, 0x4c), }; static const struct qmp_phy_init_tbl sm8550_ufsphy_rx[] = { - QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE2, 0x0c), - QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE4, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE2, 0x0c), QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL, 0x0e), QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B0, 0xc2), @@ -801,6 +805,69 @@ static const struct qmp_phy_init_tbl sm8550_ufsphy_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x2b), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04), +}; + +static const struct qmp_phy_init_tbl sm8650_ufsphy_serdes[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0xd9), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_HS_SWITCH_SEL_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x44), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_INITVAL2, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x07), +}; + +static const struct qmp_phy_init_tbl sm8650_ufsphy_tx[] = { + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_LANE_MODE_1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_RES_CODE_LANE_OFFSET_TX, 0x07), +}; + +static const struct qmp_phy_init_tbl sm8650_ufsphy_rx[] = { + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE2, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE4, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B0, 0xc2), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B1, 0xc2), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B3, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B6, 0x60), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE2_B3, 0x9e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE2_B6, 0x60), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE3_B3, 0x9e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE3_B4, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE3_B5, 0x36), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE3_B8, 0x02), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE4_B3, 0xb9), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE4_B6, 0xff), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_SO_SATURATION, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_PI_CTRL1, 0x94), + QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_RX_TERM_BW_CTRL0, 0xfa), +}; + +static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x00), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x69), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), }; struct qmp_ufs_offsets { @@ -1296,6 +1363,32 @@ static const struct qmp_phy_cfg sm8550_ufsphy_cfg = { .pcs = sm8550_ufsphy_pcs, .pcs_num = ARRAY_SIZE(sm8550_ufsphy_pcs), }, + .tbls_hs_b = { + .serdes = sm8550_ufsphy_hs_b_serdes, + .serdes_num = ARRAY_SIZE(sm8550_ufsphy_hs_b_serdes), + }, + .clk_list = sdm845_ufs_phy_clk_l, + .num_clks = ARRAY_SIZE(sdm845_ufs_phy_clk_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = ufsphy_v6_regs_layout, +}; + +static const struct qmp_phy_cfg sm8650_ufsphy_cfg = { + .lanes = 2, + + .offsets = &qmp_ufs_offsets_v6, + + .tbls = { + .serdes = sm8650_ufsphy_serdes, + .serdes_num = ARRAY_SIZE(sm8650_ufsphy_serdes), + .tx = sm8650_ufsphy_tx, + .tx_num = ARRAY_SIZE(sm8650_ufsphy_tx), + .rx = sm8650_ufsphy_rx, + .rx_num = ARRAY_SIZE(sm8650_ufsphy_rx), + .pcs = sm8650_ufsphy_pcs, + .pcs_num = ARRAY_SIZE(sm8650_ufsphy_pcs), + }, .clk_list = sdm845_ufs_phy_clk_l, .num_clks = ARRAY_SIZE(sdm845_ufs_phy_clk_l), .vreg_list = qmp_phy_vreg_l, @@ -1826,6 +1919,9 @@ static const struct of_device_id qmp_ufs_of_match_table[] = { }, { .compatible = "qcom,sm8550-qmp-ufs-phy", .data = &sm8550_ufsphy_cfg, + }, { + .compatible = "qcom,sm8650-qmp-ufs-phy", + .data = &sm8650_ufsphy_cfg, }, { }, }; diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 02f156298e77ce2761c1d14e70e13873fc6c420d..243cc2b9a0fb6d1fadc7384a9e93f453efad6351 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -24,6 +24,8 @@ #include "phy-qcom-qmp-pcs-misc-v4.h" #include "phy-qcom-qmp-pcs-usb-v4.h" #include "phy-qcom-qmp-pcs-usb-v5.h" +#include "phy-qcom-qmp-pcs-usb-v6.h" +#include "phy-qcom-qmp-pcs-usb-v7.h" /* QPHY_SW_RESET bit */ #define SW_RESET BIT(0) @@ -151,6 +153,28 @@ static const unsigned int qmp_v5_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V5_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR, }; +static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = QPHY_V6_PCS_SW_RESET, + [QPHY_START_CTRL] = QPHY_V6_PCS_START_CONTROL, + [QPHY_PCS_STATUS] = QPHY_V6_PCS_PCS_STATUS1, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V6_PCS_POWER_DOWN_CONTROL, + + /* In PCS_USB */ + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V6_PCS_USB3_AUTONOMOUS_MODE_CTRL, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V6_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR, +}; + +static const unsigned int qmp_v7_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = QPHY_V7_PCS_SW_RESET, + [QPHY_START_CTRL] = QPHY_V7_PCS_START_CONTROL, + [QPHY_PCS_STATUS] = QPHY_V7_PCS_PCS_STATUS1, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V7_PCS_POWER_DOWN_CONTROL, + + /* In PCS_USB */ + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V7_PCS_USB3_AUTONOMOUS_MODE_CTRL, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V7_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR, +}; + static const struct qmp_phy_init_tbl ipq9574_usb3_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x1a), QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x08), @@ -871,6 +895,134 @@ static const struct qmp_phy_init_tbl sdx65_usb3_uniphy_rx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_ENABLES, 0x00), }; +static const struct qmp_phy_init_tbl sdx75_usb3_uniphy_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE1, 0x9e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x2e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE1, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE1, 0xea), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE1, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xb7), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xb7), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x9e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xea), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE1_MODE0, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE2_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_CFG, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_1, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_2, 0x4b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_AUTO_GAIN_ADJ_CTRL_3, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_ADDITIONAL_MISC, 0x0c), +}; + +static const struct qmp_phy_init_tbl sdx75_usb3_uniphy_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_TX_RES_CODE_LANE_TX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_RES_CODE_LANE_RX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_RES_CODE_LANE_OFFSET_TX, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_RES_CODE_LANE_OFFSET_RX, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_LANE_MODE_1, 0xf5), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_LANE_MODE_3, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_LANE_MODE_5, 0x5f), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_RCV_DETECT_LVL_2, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V6_TX_PI_QEC_CTRL, 0x21), +}; + +static const struct qmp_phy_init_tbl sdx75_usb3_uniphy_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_FO_GAIN, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SO_GAIN, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_PI_CONTROLS, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SB2_GAIN1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SB2_GAIN2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_AUX_DATA_TCOARSE_TFINE, 0xa0), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_VGA_CAL_CNTRL1, 0x54), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_VGA_CAL_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_GM_CAL, 0x13), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_IDAC_TSETTLE_LOW, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_IDAC_TSETTLE_HIGH, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_SIGDET_CNTRL, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_SIGDET_DEGLITCH_CNTRL, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_00_LOW, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_00_HIGH, 0xbf), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_00_HIGH2, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_00_HIGH3, 0xdf), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_00_HIGH4, 0xed), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_01_HIGH, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_01_HIGH2, 0x9c), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_01_HIGH3, 0x1d), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_01_HIGH4, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_DFE_EN_TIMER, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_DCC_CTRL1, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_VTH_CODE, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_SIGDET_CAL_CTRL1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_RX_SIGDET_CAL_TRIM, 0x08), +}; + +static const struct qmp_phy_init_tbl sdx75_usb3_uniphy_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RX_SIGDET_LVL, 0xaa), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_CDR_RESET_TIME, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG1, 0x88), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG2, 0x13), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG5, 0x10), +}; + +static const struct qmp_phy_init_tbl sdx75_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_USB3_RCVR_DTCT_DLY_U3_H, 0x00), +}; + static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xa5), QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_2, 0x82), @@ -1161,6 +1313,134 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_usb_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), }; +static const struct qmp_phy_init_tbl x1e80100_usb3_uniphy_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_STEP_SIZE1_MODE1, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_STEP_SIZE2_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_CP_CTRL_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_LOCK_CMP1_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_LOCK_CMP2_MODE1, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DEC_START_MODE1, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START1_MODE1, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START2_MODE1, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START3_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_VCO_TUNE1_MODE1, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_VCO_TUNE2_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_STEP_SIZE1_MODE0, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_STEP_SIZE2_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_CP_CTRL_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_LOCK_CMP1_MODE0, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DEC_START_MODE0, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START2_MODE0, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_DIV_FRAC_START3_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_VCO_TUNE1_MODE0, 0x25), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_VCO_TUNE2_MODE0, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_BG_TIMER, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_PER1, 0x62), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SYSCLK_BUF_ENABLE, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_SYSCLK_EN_SEL, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_LOCK_CMP_CFG, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_VCO_TUNE_MAP, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_CORE_CLK_EN, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_1, 0xb6), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_2, 0x4b), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_AUTO_GAIN_ADJ_CTRL_3, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V7_COM_ADDITIONAL_MISC, 0x0c), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb3_uniphy_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V7_TX_RES_CODE_LANE_TX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_RES_CODE_LANE_RX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_RES_CODE_LANE_OFFSET_TX, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_RES_CODE_LANE_OFFSET_RX, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_LANE_MODE_1, 0xf5), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_LANE_MODE_3, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_LANE_MODE_5, 0x5f), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_RCV_DETECT_LVL_2, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V7_TX_PI_QEC_CTRL, 0x21), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb3_uniphy_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_FO_GAIN, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SO_GAIN, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_PI_CONTROLS, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SB2_GAIN1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_UCDR_SB2_GAIN2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_AUX_DATA_TCOARSE_TFINE, 0xa0), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_VGA_CAL_CNTRL1, 0x54), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_VGA_CAL_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_GM_CAL, 0x13), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_IDAC_TSETTLE_LOW, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_IDAC_TSETTLE_HIGH, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_SIGDET_CNTRL, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_SIGDET_DEGLITCH_CNTRL, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_00_LOW, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_00_HIGH, 0xbf), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_00_HIGH2, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_00_HIGH3, 0xdf), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_00_HIGH4, 0xed), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_01_HIGH, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_01_HIGH2, 0x9c), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_01_HIGH3, 0x1d), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_RX_MODE_01_HIGH4, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_DFE_EN_TIMER, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_DCC_CTRL1, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_VTH_CODE, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_SIGDET_CAL_CTRL1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V7_RX_SIGDET_CAL_TRIM, 0x08), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb3_uniphy_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V7_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_RX_SIGDET_LVL, 0xaa), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_CDR_RESET_TIME, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_ALIGN_DETECT_CONFIG1, 0x88), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_ALIGN_DETECT_CONFIG2, 0x13), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_EQ_CONFIG5, 0x10), +}; + +static const struct qmp_phy_init_tbl x1e80100_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V7_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40), + QMP_PHY_INIT_CFG(QPHY_V7_PCS_USB3_RCVR_DTCT_DLY_U3_H, 0x00), +}; + struct qmp_usb_offsets { u16 serdes; u16 pcs; @@ -1317,6 +1597,22 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v5 = { .rx = 0x1000, }; +static const struct qmp_usb_offsets qmp_usb_offsets_v6 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_usb = 0x1200, + .tx = 0x0e00, + .rx = 0x1000, +}; + +static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_usb = 0x1200, + .tx = 0x0e00, + .rx = 0x1000, +}; + static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, @@ -1541,6 +1837,28 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .has_pwrdn_delay = true, }; +static const struct qmp_phy_cfg sdx75_usb3_uniphy_cfg = { + .lanes = 1, + .offsets = &qmp_usb_offsets_v6, + + .serdes_tbl = sdx75_usb3_uniphy_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sdx75_usb3_uniphy_serdes_tbl), + .tx_tbl = sdx75_usb3_uniphy_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sdx75_usb3_uniphy_tx_tbl), + .rx_tbl = sdx75_usb3_uniphy_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sdx75_usb3_uniphy_rx_tbl), + .pcs_tbl = sdx75_usb3_uniphy_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sdx75_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sdx75_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sdx75_usb3_uniphy_pcs_usb_tbl), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v6_usb3phy_regs_layout, + .pcs_usb_offset = 0x1000, + + .has_pwrdn_delay = true, +}; + static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .lanes = 1, @@ -1582,6 +1900,26 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { .regs = qmp_v3_usb3phy_regs_layout_qcm2290, }; +static const struct qmp_phy_cfg x1e80100_usb3_uniphy_cfg = { + .lanes = 1, + + .offsets = &qmp_usb_offsets_v7, + + .serdes_tbl = x1e80100_usb3_uniphy_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(x1e80100_usb3_uniphy_serdes_tbl), + .tx_tbl = x1e80100_usb3_uniphy_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(x1e80100_usb3_uniphy_tx_tbl), + .rx_tbl = x1e80100_usb3_uniphy_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(x1e80100_usb3_uniphy_rx_tbl), + .pcs_tbl = x1e80100_usb3_uniphy_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(x1e80100_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = x1e80100_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(x1e80100_usb3_uniphy_pcs_usb_tbl), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v7_usb3phy_regs_layout, +}; + static void qmp_usb_configure_lane(void __iomem *base, const struct qmp_phy_init_tbl tbl[], int num, @@ -2256,6 +2594,9 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sdx65-qmp-usb3-uni-phy", .data = &sdx65_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx75-qmp-usb3-uni-phy", + .data = &sdx75_usb3_uniphy_cfg, }, { .compatible = "qcom,sm6115-qmp-usb3-phy", .data = &qcm2290_usb3phy_cfg, @@ -2268,6 +2609,9 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sm8350-qmp-usb3-uni-phy", .data = &sm8350_usb3_uniphy_cfg, + }, { + .compatible = "qcom,x1e80100-qmp-usb3-uni-phy", + .data = &x1e80100_usb3_uniphy_cfg, }, { }, }; diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 71f063f4a56e3d6234792af5332fd4104a6e9e25..6923496cbfee21c2bef6d1fa342254f806dc26fd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -24,8 +24,12 @@ #include "phy-qcom-qmp-qserdes-com-v6.h" #include "phy-qcom-qmp-qserdes-txrx-v6.h" #include "phy-qcom-qmp-qserdes-txrx-v6_20.h" +#include "phy-qcom-qmp-qserdes-txrx-v6_n4.h" #include "phy-qcom-qmp-qserdes-ln-shrd-v6.h" +#include "phy-qcom-qmp-qserdes-com-v7.h" +#include "phy-qcom-qmp-qserdes-txrx-v7.h" + #include "phy-qcom-qmp-qserdes-pll.h" #include "phy-qcom-qmp-pcs-v2.h" @@ -44,6 +48,8 @@ #include "phy-qcom-qmp-pcs-v6_20.h" +#include "phy-qcom-qmp-pcs-v7.h" + /* Only for QMP V3 & V4 PHY - DP COM registers */ #define QPHY_V3_DP_COM_PHY_MODE_CTRL 0x00 #define QPHY_V3_DP_COM_SW_RESET 0x04 diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index 36505fc5f386e2ca17b3efa8b30c1eb554b3e9a7..e342eef0640b78a47dd766ead8772604a02b24ed 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -13,7 +13,7 @@ config PHY_R8A779F0_ETHERNET_SERDES config PHY_RCAR_GEN2 tristate "Renesas R-Car generation 2 USB PHY driver" depends on ARCH_RENESAS - depends on GENERIC_PHY + select GENERIC_PHY help Support for USB PHY found on Renesas R-Car generation 2 SoCs. diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index a24d2af154df7807adab8070d637860b1b4aa2a9..4f71373ae6e1a3782ffdaf4c32cd2c21befa5908 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -123,9 +123,12 @@ struct rockchip_chg_det_reg { * @disrise_en: host disconnect rise edge detection enable. * @disrise_st: host disconnect rise edge detection state. * @disrise_clr: host disconnect rise edge detection clear. - * @id_det_en: id detection enable register. - * @id_det_st: id detection state register. - * @id_det_clr: id detection clear register. + * @idfall_det_en: id detection enable register, falling edge + * @idfall_det_st: id detection state register, falling edge + * @idfall_det_clr: id detection clear register, falling edge + * @idrise_det_en: id detection enable register, rising edge + * @idrise_det_st: id detection state register, rising edge + * @idrise_det_clr: id detection clear register, rising edge * @ls_det_en: linestate detection enable register. * @ls_det_st: linestate detection state register. * @ls_det_clr: linestate detection clear register. @@ -146,9 +149,12 @@ struct rockchip_usb2phy_port_cfg { struct usb2phy_reg disrise_en; struct usb2phy_reg disrise_st; struct usb2phy_reg disrise_clr; - struct usb2phy_reg id_det_en; - struct usb2phy_reg id_det_st; - struct usb2phy_reg id_det_clr; + struct usb2phy_reg idfall_det_en; + struct usb2phy_reg idfall_det_st; + struct usb2phy_reg idfall_det_clr; + struct usb2phy_reg idrise_det_en; + struct usb2phy_reg idrise_det_st; + struct usb2phy_reg idrise_det_clr; struct usb2phy_reg ls_det_en; struct usb2phy_reg ls_det_st; struct usb2phy_reg ls_det_clr; @@ -488,15 +494,27 @@ static int rockchip_usb2phy_init(struct phy *phy) if (ret) goto out; - /* clear id status and enable id detect irq */ + /* clear id status and enable id detect irqs */ ret = property_enable(rphy->grf, - &rport->port_cfg->id_det_clr, + &rport->port_cfg->idfall_det_clr, true); if (ret) goto out; ret = property_enable(rphy->grf, - &rport->port_cfg->id_det_en, + &rport->port_cfg->idrise_det_clr, + true); + if (ret) + goto out; + + ret = property_enable(rphy->grf, + &rport->port_cfg->idfall_det_en, + true); + if (ret) + goto out; + + ret = property_enable(rphy->grf, + &rport->port_cfg->idrise_det_en, true); if (ret) goto out; @@ -1030,11 +1048,16 @@ static irqreturn_t rockchip_usb2phy_id_irq(int irq, void *data) struct rockchip_usb2phy *rphy = dev_get_drvdata(rport->phy->dev.parent); bool id; - if (!property_enabled(rphy->grf, &rport->port_cfg->id_det_st)) + if (!property_enabled(rphy->grf, &rport->port_cfg->idfall_det_st) && + !property_enabled(rphy->grf, &rport->port_cfg->idrise_det_st)) return IRQ_NONE; /* clear id detect irq pending status */ - property_enable(rphy->grf, &rport->port_cfg->id_det_clr, true); + if (property_enabled(rphy->grf, &rport->port_cfg->idfall_det_st)) + property_enable(rphy->grf, &rport->port_cfg->idfall_det_clr, true); + + if (property_enabled(rphy->grf, &rport->port_cfg->idrise_det_st)) + property_enable(rphy->grf, &rport->port_cfg->idrise_det_clr, true); id = property_enabled(rphy->grf, &rport->port_cfg->utmi_id); extcon_set_state_sync(rphy->edev, EXTCON_USB_HOST, !id); @@ -1464,6 +1487,14 @@ put_child: return ret; } +static int rk3128_usb2phy_tuning(struct rockchip_usb2phy *rphy) +{ + /* Turn off differential receiver in suspend mode */ + return regmap_write_bits(rphy->grf, 0x298, + BIT(2) << BIT_WRITEABLE_SHIFT | BIT(2), + BIT(2) << BIT_WRITEABLE_SHIFT | 0); +} + static int rk3588_usb2phy_tuning(struct rockchip_usb2phy *rphy) { int ret; @@ -1513,6 +1544,54 @@ static int rk3588_usb2phy_tuning(struct rockchip_usb2phy *rphy) return ret; } +static const struct rockchip_usb2phy_cfg rk3128_phy_cfgs[] = { + { + .reg = 0x17c, + .num_ports = 2, + .phy_tuning = rk3128_usb2phy_tuning, + .clkout_ctl = { 0x0190, 15, 15, 1, 0 }, + .port_cfgs = { + [USB2PHY_PORT_OTG] = { + .phy_sus = { 0x017c, 8, 0, 0, 0x1d1 }, + .bvalid_det_en = { 0x017c, 14, 14, 0, 1 }, + .bvalid_det_st = { 0x017c, 15, 15, 0, 1 }, + .bvalid_det_clr = { 0x017c, 15, 15, 0, 1 }, + .idfall_det_en = { 0x01a0, 2, 2, 0, 1 }, + .idfall_det_st = { 0x01a0, 3, 3, 0, 1 }, + .idfall_det_clr = { 0x01a0, 3, 3, 0, 1 }, + .idrise_det_en = { 0x01a0, 0, 0, 0, 1 }, + .idrise_det_st = { 0x01a0, 1, 1, 0, 1 }, + .idrise_det_clr = { 0x01a0, 1, 1, 0, 1 }, + .ls_det_en = { 0x017c, 12, 12, 0, 1 }, + .ls_det_st = { 0x017c, 13, 13, 0, 1 }, + .ls_det_clr = { 0x017c, 13, 13, 0, 1 }, + .utmi_bvalid = { 0x014c, 5, 5, 0, 1 }, + .utmi_id = { 0x014c, 8, 8, 0, 1 }, + .utmi_ls = { 0x014c, 7, 6, 0, 1 }, + }, + [USB2PHY_PORT_HOST] = { + .phy_sus = { 0x0194, 8, 0, 0, 0x1d1 }, + .ls_det_en = { 0x0194, 14, 14, 0, 1 }, + .ls_det_st = { 0x0194, 15, 15, 0, 1 }, + .ls_det_clr = { 0x0194, 15, 15, 0, 1 } + } + }, + .chg_det = { + .opmode = { 0x017c, 3, 0, 5, 1 }, + .cp_det = { 0x02c0, 6, 6, 0, 1 }, + .dcp_det = { 0x02c0, 5, 5, 0, 1 }, + .dp_det = { 0x02c0, 7, 7, 0, 1 }, + .idm_sink_en = { 0x0184, 8, 8, 0, 1 }, + .idp_sink_en = { 0x0184, 7, 7, 0, 1 }, + .idp_src_en = { 0x0184, 9, 9, 0, 1 }, + .rdm_pdwn_en = { 0x0184, 10, 10, 0, 1 }, + .vdm_src_en = { 0x0184, 12, 12, 0, 1 }, + .vdp_src_en = { 0x0184, 11, 11, 0, 1 }, + }, + }, + { /* sentinel */ } +}; + static const struct rockchip_usb2phy_cfg rk3228_phy_cfgs[] = { { .reg = 0x760, @@ -1524,9 +1603,12 @@ static const struct rockchip_usb2phy_cfg rk3228_phy_cfgs[] = { .bvalid_det_en = { 0x0680, 3, 3, 0, 1 }, .bvalid_det_st = { 0x0690, 3, 3, 0, 1 }, .bvalid_det_clr = { 0x06a0, 3, 3, 0, 1 }, - .id_det_en = { 0x0680, 6, 5, 0, 3 }, - .id_det_st = { 0x0690, 6, 5, 0, 3 }, - .id_det_clr = { 0x06a0, 6, 5, 0, 3 }, + .idfall_det_en = { 0x0680, 6, 6, 0, 1 }, + .idfall_det_st = { 0x0690, 6, 6, 0, 1 }, + .idfall_det_clr = { 0x06a0, 6, 6, 0, 1 }, + .idrise_det_en = { 0x0680, 5, 5, 0, 1 }, + .idrise_det_st = { 0x0690, 5, 5, 0, 1 }, + .idrise_det_clr = { 0x06a0, 5, 5, 0, 1 }, .ls_det_en = { 0x0680, 2, 2, 0, 1 }, .ls_det_st = { 0x0690, 2, 2, 0, 1 }, .ls_det_clr = { 0x06a0, 2, 2, 0, 1 }, @@ -1587,9 +1669,12 @@ static const struct rockchip_usb2phy_cfg rk3308_phy_cfgs[] = { .bvalid_det_en = { 0x3020, 3, 2, 0, 3 }, .bvalid_det_st = { 0x3024, 3, 2, 0, 3 }, .bvalid_det_clr = { 0x3028, 3, 2, 0, 3 }, - .id_det_en = { 0x3020, 5, 4, 0, 3 }, - .id_det_st = { 0x3024, 5, 4, 0, 3 }, - .id_det_clr = { 0x3028, 5, 4, 0, 3 }, + .idfall_det_en = { 0x3020, 5, 5, 0, 1 }, + .idfall_det_st = { 0x3024, 5, 5, 0, 1 }, + .idfall_det_clr = { 0x3028, 5, 5, 0, 1 }, + .idrise_det_en = { 0x3020, 4, 4, 0, 1 }, + .idrise_det_st = { 0x3024, 4, 4, 0, 1 }, + .idrise_det_clr = { 0x3028, 4, 4, 0, 1 }, .ls_det_en = { 0x3020, 0, 0, 0, 1 }, .ls_det_st = { 0x3024, 0, 0, 0, 1 }, .ls_det_clr = { 0x3028, 0, 0, 0, 1 }, @@ -1634,9 +1719,12 @@ static const struct rockchip_usb2phy_cfg rk3328_phy_cfgs[] = { .bvalid_det_en = { 0x0110, 3, 2, 0, 3 }, .bvalid_det_st = { 0x0114, 3, 2, 0, 3 }, .bvalid_det_clr = { 0x0118, 3, 2, 0, 3 }, - .id_det_en = { 0x0110, 5, 4, 0, 3 }, - .id_det_st = { 0x0114, 5, 4, 0, 3 }, - .id_det_clr = { 0x0118, 5, 4, 0, 3 }, + .idfall_det_en = { 0x0110, 5, 5, 0, 1 }, + .idfall_det_st = { 0x0114, 5, 5, 0, 1 }, + .idfall_det_clr = { 0x0118, 5, 5, 0, 1 }, + .idrise_det_en = { 0x0110, 4, 4, 0, 1 }, + .idrise_det_st = { 0x0114, 4, 4, 0, 1 }, + .idrise_det_clr = { 0x0118, 4, 4, 0, 1 }, .ls_det_en = { 0x0110, 0, 0, 0, 1 }, .ls_det_st = { 0x0114, 0, 0, 0, 1 }, .ls_det_clr = { 0x0118, 0, 0, 0, 1 }, @@ -1700,9 +1788,12 @@ static const struct rockchip_usb2phy_cfg rk3399_phy_cfgs[] = { .bvalid_det_en = { 0xe3c0, 3, 3, 0, 1 }, .bvalid_det_st = { 0xe3e0, 3, 3, 0, 1 }, .bvalid_det_clr = { 0xe3d0, 3, 3, 0, 1 }, - .id_det_en = { 0xe3c0, 5, 4, 0, 3 }, - .id_det_st = { 0xe3e0, 5, 4, 0, 3 }, - .id_det_clr = { 0xe3d0, 5, 4, 0, 3 }, + .idfall_det_en = { 0xe3c0, 5, 5, 0, 1 }, + .idfall_det_st = { 0xe3e0, 5, 5, 0, 1 }, + .idfall_det_clr = { 0xe3d0, 5, 5, 0, 1 }, + .idrise_det_en = { 0xe3c0, 4, 4, 0, 1 }, + .idrise_det_st = { 0xe3e0, 4, 4, 0, 1 }, + .idrise_det_clr = { 0xe3d0, 4, 4, 0, 1 }, .utmi_avalid = { 0xe2ac, 7, 7, 0, 1 }, .utmi_bvalid = { 0xe2ac, 12, 12, 0, 1 }, .utmi_id = { 0xe2ac, 8, 8, 0, 1 }, @@ -1739,9 +1830,12 @@ static const struct rockchip_usb2phy_cfg rk3399_phy_cfgs[] = { .bvalid_det_en = { 0xe3c0, 8, 8, 0, 1 }, .bvalid_det_st = { 0xe3e0, 8, 8, 0, 1 }, .bvalid_det_clr = { 0xe3d0, 8, 8, 0, 1 }, - .id_det_en = { 0xe3c0, 10, 9, 0, 3 }, - .id_det_st = { 0xe3e0, 10, 9, 0, 3 }, - .id_det_clr = { 0xe3d0, 10, 9, 0, 3 }, + .idfall_det_en = { 0xe3c0, 10, 10, 0, 1 }, + .idfall_det_st = { 0xe3e0, 10, 10, 0, 1 }, + .idfall_det_clr = { 0xe3d0, 10, 10, 0, 1 }, + .idrise_det_en = { 0xe3c0, 9, 9, 0, 1 }, + .idrise_det_st = { 0xe3e0, 9, 9, 0, 1 }, + .idrise_det_clr = { 0xe3d0, 9, 9, 0, 1 }, .utmi_avalid = { 0xe2ac, 10, 10, 0, 1 }, .utmi_bvalid = { 0xe2ac, 16, 16, 0, 1 }, .utmi_id = { 0xe2ac, 11, 11, 0, 1 }, @@ -1770,9 +1864,12 @@ static const struct rockchip_usb2phy_cfg rk3568_phy_cfgs[] = { .bvalid_det_en = { 0x0080, 3, 2, 0, 3 }, .bvalid_det_st = { 0x0084, 3, 2, 0, 3 }, .bvalid_det_clr = { 0x0088, 3, 2, 0, 3 }, - .id_det_en = { 0x0080, 5, 4, 0, 3 }, - .id_det_st = { 0x0084, 5, 4, 0, 3 }, - .id_det_clr = { 0x0088, 5, 4, 0, 3 }, + .idfall_det_en = { 0x0080, 5, 5, 0, 1 }, + .idfall_det_st = { 0x0084, 5, 5, 0, 1 }, + .idfall_det_clr = { 0x0088, 5, 5, 0, 1 }, + .idrise_det_en = { 0x0080, 4, 4, 0, 1 }, + .idrise_det_st = { 0x0084, 4, 4, 0, 1 }, + .idrise_det_clr = { 0x0088, 4, 4, 0, 1 }, .utmi_avalid = { 0x00c0, 10, 10, 0, 1 }, .utmi_bvalid = { 0x00c0, 9, 9, 0, 1 }, .utmi_id = { 0x00c0, 6, 6, 0, 1 }, @@ -1990,6 +2087,7 @@ static const struct rockchip_usb2phy_cfg rv1108_phy_cfgs[] = { static const struct of_device_id rockchip_usb2phy_dt_match[] = { { .compatible = "rockchip,px30-usb2phy", .data = &rk3328_phy_cfgs }, + { .compatible = "rockchip,rk3128-usb2phy", .data = &rk3128_phy_cfgs }, { .compatible = "rockchip,rk3228-usb2phy", .data = &rk3228_phy_cfgs }, { .compatible = "rockchip,rk3308-usb2phy", .data = &rk3308_phy_cfgs }, { .compatible = "rockchip,rk3328-usb2phy", .data = &rk3328_phy_cfgs }, diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index bc847d3879f79c0684693a475a3c4664f2792794..0f4818adb440022d0d9b21a723e36ca062091268 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -248,7 +248,7 @@ static const struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j784s4 = { .use_of_data = true, .regfields = phy_gmii_sel_fields_am654, - .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | + .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII) | BIT(PHY_INTERFACE_MODE_USXGMII), .num_ports = 8, .num_qsgmii_main_ports = 2, diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index fc3cd98c60ff42e1a0aa30698e68319b49009063..00d7e6a6de03a2cb85c26eeba29f90d57096f592 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -1240,6 +1240,7 @@ static int wiz_phy_fullrt_div(struct wiz *wiz, int lane) case J721E_WIZ_10G: case J7200_WIZ_10G: case J721S2_WIZ_10G: + case J784S4_WIZ_10G: if (wiz->lane_phy_type[lane] == PHY_TYPE_SGMII) return regmap_field_write(wiz->p0_fullrt_div[lane], 0x2); break; diff --git a/drivers/power/reset/as3722-poweroff.c b/drivers/power/reset/as3722-poweroff.c index 829e0dba2fda3beca8f52b00d4f5e12add8c0803..ab3350ce2d6214416a211e3fe3cf11936164688f 100644 --- a/drivers/power/reset/as3722-poweroff.c +++ b/drivers/power/reset/as3722-poweroff.c @@ -61,13 +61,11 @@ static int as3722_poweroff_probe(struct platform_device *pdev) return 0; } -static int as3722_poweroff_remove(struct platform_device *pdev) +static void as3722_poweroff_remove(struct platform_device *pdev) { if (pm_power_off == as3722_pm_power_off) pm_power_off = NULL; as3722_pm_poweroff = NULL; - - return 0; } static struct platform_driver as3722_poweroff_driver = { @@ -75,7 +73,7 @@ static struct platform_driver as3722_poweroff_driver = { .name = "as3722-power-off", }, .probe = as3722_poweroff_probe, - .remove = as3722_poweroff_remove, + .remove_new = as3722_poweroff_remove, }; module_platform_driver(as3722_poweroff_driver); diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c index dd5399785b6917a3d1e40a878411283eb2c0fce2..93eece0278652207b5fdfd597268f7ef1beaf867 100644 --- a/drivers/power/reset/at91-poweroff.c +++ b/drivers/power/reset/at91-poweroff.c @@ -57,7 +57,7 @@ static struct shdwc { void __iomem *mpddrc_base; } at91_shdwc; -static void __init at91_wakeup_status(struct platform_device *pdev) +static void at91_wakeup_status(struct platform_device *pdev) { const char *reason; u32 reg = readl(at91_shdwc.shdwc_base + AT91_SHDW_SR); @@ -149,7 +149,7 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev) writel(wakeup_mode | mode, at91_shdwc.shdwc_base + AT91_SHDW_MR); } -static int __init at91_poweroff_probe(struct platform_device *pdev) +static int at91_poweroff_probe(struct platform_device *pdev) { struct device_node *np; u32 ddr_type; @@ -202,7 +202,7 @@ clk_disable: return ret; } -static int __exit at91_poweroff_remove(struct platform_device *pdev) +static void at91_poweroff_remove(struct platform_device *pdev) { if (pm_power_off == at91_poweroff) pm_power_off = NULL; @@ -211,8 +211,6 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev) iounmap(at91_shdwc.mpddrc_base); clk_disable_unprepare(at91_shdwc.sclk); - - return 0; } static const struct of_device_id at91_poweroff_of_match[] = { @@ -224,13 +222,14 @@ static const struct of_device_id at91_poweroff_of_match[] = { MODULE_DEVICE_TABLE(of, at91_poweroff_of_match); static struct platform_driver at91_poweroff_driver = { - .remove = __exit_p(at91_poweroff_remove), + .probe = at91_poweroff_probe, + .remove_new = at91_poweroff_remove, .driver = { .name = "at91-poweroff", .of_match_table = at91_poweroff_of_match, }, }; -module_platform_driver_probe(at91_poweroff_driver, at91_poweroff_probe); +module_platform_driver(at91_poweroff_driver); MODULE_AUTHOR("Atmel Corporation"); MODULE_DESCRIPTION("Shutdown driver for Atmel SoCs"); diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c index aa9b012d3d00b8489b4a82539aad03157987b328..16512654295f5c4007860d01859f42154177ec70 100644 --- a/drivers/power/reset/at91-reset.c +++ b/drivers/power/reset/at91-reset.c @@ -337,7 +337,7 @@ static int at91_rcdev_init(struct at91_reset *reset, return devm_reset_controller_register(&pdev->dev, &reset->rcdev); } -static int __init at91_reset_probe(struct platform_device *pdev) +static int at91_reset_probe(struct platform_device *pdev) { const struct of_device_id *match; struct at91_reset *reset; @@ -417,24 +417,23 @@ disable_clk: return ret; } -static int __exit at91_reset_remove(struct platform_device *pdev) +static void at91_reset_remove(struct platform_device *pdev) { struct at91_reset *reset = platform_get_drvdata(pdev); unregister_restart_handler(&reset->nb); clk_disable_unprepare(reset->sclk); - - return 0; } static struct platform_driver at91_reset_driver = { - .remove = __exit_p(at91_reset_remove), + .probe = at91_reset_probe, + .remove_new = at91_reset_remove, .driver = { .name = "at91-reset", .of_match_table = at91_reset_of_match, }, }; -module_platform_driver_probe(at91_reset_driver, at91_reset_probe); +module_platform_driver(at91_reset_driver); MODULE_AUTHOR("Atmel Corporation"); MODULE_DESCRIPTION("Reset driver for Atmel SoCs"); diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index e76b102b57b1fc9c3d5fc274f35520b55158269c..959ce0dbe91d112d006176bd36165cf208e4f810 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -107,7 +107,7 @@ static const unsigned long long sdwc_dbc_period[] = { 0, 3, 32, 512, 4096, 32768, }; -static void __init at91_wakeup_status(struct platform_device *pdev) +static void at91_wakeup_status(struct platform_device *pdev) { struct shdwc *shdw = platform_get_drvdata(pdev); const struct reg_config *rcfg = shdw->rcfg; @@ -329,7 +329,7 @@ static const struct of_device_id at91_pmc_ids[] = { { /* Sentinel. */ } }; -static int __init at91_shdwc_probe(struct platform_device *pdev) +static int at91_shdwc_probe(struct platform_device *pdev) { const struct of_device_id *match; struct device_node *np; @@ -421,7 +421,7 @@ clk_disable: return ret; } -static int __exit at91_shdwc_remove(struct platform_device *pdev) +static void at91_shdwc_remove(struct platform_device *pdev) { struct shdwc *shdw = platform_get_drvdata(pdev); @@ -437,18 +437,17 @@ static int __exit at91_shdwc_remove(struct platform_device *pdev) iounmap(shdw->pmc_base); clk_disable_unprepare(shdw->sclk); - - return 0; } static struct platform_driver at91_shdwc_driver = { - .remove = __exit_p(at91_shdwc_remove), + .probe = at91_shdwc_probe, + .remove_new = at91_shdwc_remove, .driver = { .name = "at91-shdwc", .of_match_table = at91_shdwc_of_match, }, }; -module_platform_driver_probe(at91_shdwc_driver, at91_shdwc_probe); +module_platform_driver(at91_shdwc_driver); MODULE_AUTHOR("Nicolas Ferre "); MODULE_DESCRIPTION("Atmel shutdown controller driver"); diff --git a/drivers/power/reset/atc260x-poweroff.c b/drivers/power/reset/atc260x-poweroff.c index 98f20251a6d18d7cd590ef5d9bd02df5301e0800..b4aa50e9685e1fbb496901a4d75bd9bce64779cb 100644 --- a/drivers/power/reset/atc260x-poweroff.c +++ b/drivers/power/reset/atc260x-poweroff.c @@ -233,7 +233,7 @@ static int atc260x_pwrc_probe(struct platform_device *pdev) return ret; } -static int atc260x_pwrc_remove(struct platform_device *pdev) +static void atc260x_pwrc_remove(struct platform_device *pdev) { struct atc260x_pwrc *priv = platform_get_drvdata(pdev); @@ -243,13 +243,11 @@ static int atc260x_pwrc_remove(struct platform_device *pdev) } unregister_restart_handler(&priv->restart_nb); - - return 0; } static struct platform_driver atc260x_pwrc_driver = { .probe = atc260x_pwrc_probe, - .remove = atc260x_pwrc_remove, + .remove_new = atc260x_pwrc_remove, .driver = { .name = "atc260x-pwrc", }, diff --git a/drivers/power/reset/gpio-restart.c b/drivers/power/reset/gpio-restart.c index 3aa19765772dce4bbe8b8a39dc27725cc8fd12b0..d1e177176fa1f157fae75f1804d5436f19a13c6d 100644 --- a/drivers/power/reset/gpio-restart.c +++ b/drivers/power/reset/gpio-restart.c @@ -17,17 +17,14 @@ struct gpio_restart { struct gpio_desc *reset_gpio; - struct notifier_block restart_handler; u32 active_delay_ms; u32 inactive_delay_ms; u32 wait_delay_ms; }; -static int gpio_restart_notify(struct notifier_block *this, - unsigned long mode, void *cmd) +static int gpio_restart_notify(struct sys_off_data *data) { - struct gpio_restart *gpio_restart = - container_of(this, struct gpio_restart, restart_handler); + struct gpio_restart *gpio_restart = data->cb_data; /* drive it active, also inactive->active edge */ gpiod_direction_output(gpio_restart->reset_gpio, 1); @@ -52,6 +49,7 @@ static int gpio_restart_probe(struct platform_device *pdev) { struct gpio_restart *gpio_restart; bool open_source = false; + int priority = 129; u32 property; int ret; @@ -71,8 +69,6 @@ static int gpio_restart_probe(struct platform_device *pdev) return ret; } - gpio_restart->restart_handler.notifier_call = gpio_restart_notify; - gpio_restart->restart_handler.priority = 129; gpio_restart->active_delay_ms = 100; gpio_restart->inactive_delay_ms = 100; gpio_restart->wait_delay_ms = 3000; @@ -83,7 +79,7 @@ static int gpio_restart_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Invalid priority property: %u\n", property); else - gpio_restart->restart_handler.priority = property; + priority = property; } of_property_read_u32(pdev->dev.of_node, "active-delay", @@ -93,9 +89,11 @@ static int gpio_restart_probe(struct platform_device *pdev) of_property_read_u32(pdev->dev.of_node, "wait-delay", &gpio_restart->wait_delay_ms); - platform_set_drvdata(pdev, gpio_restart); - - ret = register_restart_handler(&gpio_restart->restart_handler); + ret = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_RESTART, + priority, + gpio_restart_notify, + gpio_restart); if (ret) { dev_err(&pdev->dev, "%s: cannot register restart handler, %d\n", __func__, ret); @@ -105,19 +103,6 @@ static int gpio_restart_probe(struct platform_device *pdev) return 0; } -static void gpio_restart_remove(struct platform_device *pdev) -{ - struct gpio_restart *gpio_restart = platform_get_drvdata(pdev); - int ret; - - ret = unregister_restart_handler(&gpio_restart->restart_handler); - if (ret) { - dev_err(&pdev->dev, - "%s: cannot unregister restart handler, %d\n", - __func__, ret); - } -} - static const struct of_device_id of_gpio_restart_match[] = { { .compatible = "gpio-restart", }, {}, @@ -125,7 +110,6 @@ static const struct of_device_id of_gpio_restart_match[] = { static struct platform_driver gpio_restart_driver = { .probe = gpio_restart_probe, - .remove_new = gpio_restart_remove, .driver = { .name = "restart-gpio", .of_match_table = of_gpio_restart_match, diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index eea05921a054b54e7543c8d549a08a79763fe912..fa25fbd5393433930845cc42de6bd91ea7cb8b81 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -286,7 +286,7 @@ static int ltc2952_poweroff_probe(struct platform_device *pdev) return 0; } -static int ltc2952_poweroff_remove(struct platform_device *pdev) +static void ltc2952_poweroff_remove(struct platform_device *pdev) { struct ltc2952_poweroff *data = platform_get_drvdata(pdev); @@ -295,7 +295,6 @@ static int ltc2952_poweroff_remove(struct platform_device *pdev) hrtimer_cancel(&data->timer_wde); atomic_notifier_chain_unregister(&panic_notifier_list, &data->panic_notifier); - return 0; } static const struct of_device_id of_ltc2952_poweroff_match[] = { @@ -306,7 +305,7 @@ MODULE_DEVICE_TABLE(of, of_ltc2952_poweroff_match); static struct platform_driver ltc2952_poweroff_driver = { .probe = ltc2952_poweroff_probe, - .remove = ltc2952_poweroff_remove, + .remove_new = ltc2952_poweroff_remove, .driver = { .name = "ltc2952-poweroff", .of_match_table = of_ltc2952_poweroff_match, diff --git a/drivers/power/reset/mt6323-poweroff.c b/drivers/power/reset/mt6323-poweroff.c index 108167f7738bbca05b8d8443cb146314f44746c1..57a63c0ab7fb702e4c15aea74f3d5b36d21b57fd 100644 --- a/drivers/power/reset/mt6323-poweroff.c +++ b/drivers/power/reset/mt6323-poweroff.c @@ -70,12 +70,10 @@ static int mt6323_pwrc_probe(struct platform_device *pdev) return 0; } -static int mt6323_pwrc_remove(struct platform_device *pdev) +static void mt6323_pwrc_remove(struct platform_device *pdev) { if (pm_power_off == &mt6323_do_pwroff) pm_power_off = NULL; - - return 0; } static const struct of_device_id mt6323_pwrc_dt_match[] = { @@ -86,7 +84,7 @@ MODULE_DEVICE_TABLE(of, mt6323_pwrc_dt_match); static struct platform_driver mt6323_pwrc_driver = { .probe = mt6323_pwrc_probe, - .remove = mt6323_pwrc_remove, + .remove_new = mt6323_pwrc_remove, .driver = { .name = "mt6323-pwrc", .of_match_table = mt6323_pwrc_dt_match, diff --git a/drivers/power/reset/pwr-mlxbf.c b/drivers/power/reset/pwr-mlxbf.c index de35d24bb7ef3edcf22afbdf597ad3436cba0ae5..1775b318d0ef4187cd96031a5a83af3b1e94358a 100644 --- a/drivers/power/reset/pwr-mlxbf.c +++ b/drivers/power/reset/pwr-mlxbf.c @@ -17,11 +17,17 @@ #include struct pwr_mlxbf { - struct work_struct send_work; + struct work_struct reboot_work; + struct work_struct shutdown_work; const char *hid; }; -static void pwr_mlxbf_send_work(struct work_struct *work) +static void pwr_mlxbf_reboot_work(struct work_struct *work) +{ + acpi_bus_generate_netlink_event("button/reboot.*", "Reboot Button", 0x80, 1); +} + +static void pwr_mlxbf_shutdown_work(struct work_struct *work) { acpi_bus_generate_netlink_event("button/power.*", "Power Button", 0x80, 1); } @@ -33,10 +39,10 @@ static irqreturn_t pwr_mlxbf_irq(int irq, void *ptr) struct pwr_mlxbf *priv = ptr; if (!strncmp(priv->hid, rst_pwr_hid, 8)) - emergency_restart(); + schedule_work(&priv->reboot_work); if (!strncmp(priv->hid, low_pwr_hid, 8)) - schedule_work(&priv->send_work); + schedule_work(&priv->shutdown_work); return IRQ_HANDLED; } @@ -64,7 +70,11 @@ static int pwr_mlxbf_probe(struct platform_device *pdev) if (irq < 0) return dev_err_probe(dev, irq, "Error getting %s irq.\n", priv->hid); - err = devm_work_autocancel(dev, &priv->send_work, pwr_mlxbf_send_work); + err = devm_work_autocancel(dev, &priv->shutdown_work, pwr_mlxbf_shutdown_work); + if (err) + return err; + + err = devm_work_autocancel(dev, &priv->reboot_work, pwr_mlxbf_reboot_work); if (err) return err; diff --git a/drivers/power/reset/qnap-poweroff.c b/drivers/power/reset/qnap-poweroff.c index 0ddf7f25f7b8749cf92c800498efd37095c97553..e0f2ff6b147c19a932d600513f652b42df1b03fd 100644 --- a/drivers/power/reset/qnap-poweroff.c +++ b/drivers/power/reset/qnap-poweroff.c @@ -111,15 +111,14 @@ static int qnap_power_off_probe(struct platform_device *pdev) return 0; } -static int qnap_power_off_remove(struct platform_device *pdev) +static void qnap_power_off_remove(struct platform_device *pdev) { pm_power_off = NULL; - return 0; } static struct platform_driver qnap_power_off_driver = { .probe = qnap_power_off_probe, - .remove = qnap_power_off_remove, + .remove_new = qnap_power_off_remove, .driver = { .name = "qnap_power_off", .of_match_table = of_match_ptr(qnap_power_off_of_match_table), diff --git a/drivers/power/reset/regulator-poweroff.c b/drivers/power/reset/regulator-poweroff.c index 7f87fbb8b051e23cc17f107efc405302223cc341..15160809c423a5d4e67fa07cea9f9b1c20cdd06f 100644 --- a/drivers/power/reset/regulator-poweroff.c +++ b/drivers/power/reset/regulator-poweroff.c @@ -52,12 +52,10 @@ static int regulator_poweroff_probe(struct platform_device *pdev) return 0; } -static int regulator_poweroff_remove(__maybe_unused struct platform_device *pdev) +static void regulator_poweroff_remove(struct platform_device *pdev) { if (pm_power_off == ®ulator_poweroff_do_poweroff) pm_power_off = NULL; - - return 0; } static const struct of_device_id of_regulator_poweroff_match[] = { @@ -68,7 +66,7 @@ MODULE_DEVICE_TABLE(of, of_regulator_poweroff_match); static struct platform_driver regulator_poweroff_driver = { .probe = regulator_poweroff_probe, - .remove = regulator_poweroff_remove, + .remove_new = regulator_poweroff_remove, .driver = { .name = "poweroff-regulator", .of_match_table = of_regulator_poweroff_match, diff --git a/drivers/power/reset/restart-poweroff.c b/drivers/power/reset/restart-poweroff.c index 28f1822db162610c2b7c7a5dc4a907a31af43224..f4d6004793d3aa0cd5af28fa05e2de5df234c812 100644 --- a/drivers/power/reset/restart-poweroff.c +++ b/drivers/power/reset/restart-poweroff.c @@ -33,12 +33,10 @@ static int restart_poweroff_probe(struct platform_device *pdev) return 0; } -static int restart_poweroff_remove(struct platform_device *pdev) +static void restart_poweroff_remove(struct platform_device *pdev) { if (pm_power_off == &restart_poweroff_do_poweroff) pm_power_off = NULL; - - return 0; } static const struct of_device_id of_restart_poweroff_match[] = { @@ -49,7 +47,7 @@ MODULE_DEVICE_TABLE(of, of_restart_poweroff_match); static struct platform_driver restart_poweroff_driver = { .probe = restart_poweroff_probe, - .remove = restart_poweroff_remove, + .remove_new = restart_poweroff_remove, .driver = { .name = "poweroff-restart", .of_match_table = of_restart_poweroff_match, diff --git a/drivers/power/reset/rmobile-reset.c b/drivers/power/reset/rmobile-reset.c index bd3b396558e0df8c469ddc18869620289885665e..5df9b41c68c79cc93f9f1a28dc4e3ec85522b992 100644 --- a/drivers/power/reset/rmobile-reset.c +++ b/drivers/power/reset/rmobile-reset.c @@ -59,11 +59,10 @@ fail_unmap: return error; } -static int rmobile_reset_remove(struct platform_device *pdev) +static void rmobile_reset_remove(struct platform_device *pdev) { unregister_restart_handler(&rmobile_reset_nb); iounmap(sysc_base2); - return 0; } static const struct of_device_id rmobile_reset_of_match[] = { @@ -74,7 +73,7 @@ MODULE_DEVICE_TABLE(of, rmobile_reset_of_match); static struct platform_driver rmobile_reset_driver = { .probe = rmobile_reset_probe, - .remove = rmobile_reset_remove, + .remove_new = rmobile_reset_remove, .driver = { .name = "rmobile_reset", .of_match_table = rmobile_reset_of_match, diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c index c3aab7f59345a502a31713b54682dbf960dec10f..1b2ce7734260c7170803371449b94e9e53ce7677 100644 --- a/drivers/power/reset/syscon-poweroff.c +++ b/drivers/power/reset/syscon-poweroff.c @@ -76,12 +76,10 @@ static int syscon_poweroff_probe(struct platform_device *pdev) return 0; } -static int syscon_poweroff_remove(struct platform_device *pdev) +static void syscon_poweroff_remove(struct platform_device *pdev) { if (pm_power_off == syscon_poweroff) pm_power_off = NULL; - - return 0; } static const struct of_device_id syscon_poweroff_of_match[] = { @@ -91,7 +89,7 @@ static const struct of_device_id syscon_poweroff_of_match[] = { static struct platform_driver syscon_poweroff_driver = { .probe = syscon_poweroff_probe, - .remove = syscon_poweroff_remove, + .remove_new = syscon_poweroff_remove, .driver = { .name = "syscon-poweroff", .of_match_table = syscon_poweroff_of_match, diff --git a/drivers/power/reset/tps65086-restart.c b/drivers/power/reset/tps65086-restart.c index 5ec819eac7da4d1b6535308e6551fa72b3dfe1b7..ee8e9f4b837eaee09f2224c6cda81cfe08f296f8 100644 --- a/drivers/power/reset/tps65086-restart.c +++ b/drivers/power/reset/tps65086-restart.c @@ -62,19 +62,21 @@ static int tps65086_restart_probe(struct platform_device *pdev) return 0; } -static int tps65086_restart_remove(struct platform_device *pdev) +static void tps65086_restart_remove(struct platform_device *pdev) { struct tps65086_restart *tps65086_restart = platform_get_drvdata(pdev); int ret; ret = unregister_restart_handler(&tps65086_restart->handler); if (ret) { + /* + * tps65086_restart_probe() registered the restart handler. So + * unregistering should work fine. Checking the error code + * shouldn't be needed, still doing it for completeness. + */ dev_err(&pdev->dev, "%s: cannot unregister restart handler: %d\n", __func__, ret); - return -ENODEV; } - - return 0; } static const struct platform_device_id tps65086_restart_id_table[] = { @@ -88,7 +90,7 @@ static struct platform_driver tps65086_restart_driver = { .name = "tps65086-restart", }, .probe = tps65086_restart_probe, - .remove = tps65086_restart_remove, + .remove_new = tps65086_restart_remove, .id_table = tps65086_restart_id_table, }; module_platform_driver(tps65086_restart_driver); diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 1db290ee2591adef9e89437eec0dde519e958675..2b393eb5c2820e18d6244fad53efc6ef689613de 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -36,10 +36,16 @@ #define BQ24190_REG_POC_WDT_RESET_SHIFT 6 #define BQ24190_REG_POC_CHG_CONFIG_MASK (BIT(5) | BIT(4)) #define BQ24190_REG_POC_CHG_CONFIG_SHIFT 4 -#define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 -#define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 -#define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 -#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3 +#define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 +#define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 +#define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 +#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3 +#define BQ24296_REG_POC_OTG_CONFIG_MASK BIT(5) +#define BQ24296_REG_POC_OTG_CONFIG_SHIFT 5 +#define BQ24296_REG_POC_CHG_CONFIG_MASK BIT(4) +#define BQ24296_REG_POC_CHG_CONFIG_SHIFT 4 +#define BQ24296_REG_POC_OTG_CONFIG_DISABLE 0x0 +#define BQ24296_REG_POC_OTG_CONFIG_OTG 0x1 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_SYS_MIN_MIN 3000 @@ -134,58 +140,23 @@ #define BQ24190_REG_F_BAT_FAULT_SHIFT 3 #define BQ24190_REG_F_NTC_FAULT_MASK (BIT(2) | BIT(1) | BIT(0)) #define BQ24190_REG_F_NTC_FAULT_SHIFT 0 +#define BQ24296_REG_F_NTC_FAULT_MASK (BIT(1) | BIT(0)) +#define BQ24296_REG_F_NTC_FAULT_SHIFT 0 #define BQ24190_REG_VPRS 0x0A /* Vendor/Part/Revision Status */ #define BQ24190_REG_VPRS_PN_MASK (BIT(5) | BIT(4) | BIT(3)) #define BQ24190_REG_VPRS_PN_SHIFT 3 -#define BQ24190_REG_VPRS_PN_24190 0x4 -#define BQ24190_REG_VPRS_PN_24192 0x5 /* Also 24193, 24196 */ -#define BQ24190_REG_VPRS_PN_24192I 0x3 +#define BQ24190_REG_VPRS_PN_24190 0x4 +#define BQ24190_REG_VPRS_PN_24192 0x5 /* Also 24193, 24196 */ +#define BQ24190_REG_VPRS_PN_24192I 0x3 +#define BQ24296_REG_VPRS_PN_MASK (BIT(7) | BIT(6) | BIT(5)) +#define BQ24296_REG_VPRS_PN_SHIFT 5 +#define BQ24296_REG_VPRS_PN_24296 0x1 #define BQ24190_REG_VPRS_TS_PROFILE_MASK BIT(2) #define BQ24190_REG_VPRS_TS_PROFILE_SHIFT 2 #define BQ24190_REG_VPRS_DEV_REG_MASK (BIT(1) | BIT(0)) #define BQ24190_REG_VPRS_DEV_REG_SHIFT 0 -/* - * The FAULT register is latched by the bq24190 (except for NTC_FAULT) - * so the first read after a fault returns the latched value and subsequent - * reads return the current value. In order to return the fault status - * to the user, have the interrupt handler save the reg's value and retrieve - * it in the appropriate health/status routine. - */ -struct bq24190_dev_info { - struct i2c_client *client; - struct device *dev; - struct extcon_dev *edev; - struct power_supply *charger; - struct power_supply *battery; - struct delayed_work input_current_limit_work; - char model_name[I2C_NAME_SIZE]; - bool initialized; - bool irq_event; - bool otg_vbus_enabled; - int charge_type; - u16 sys_min; - u16 iprechg; - u16 iterm; - u32 ichg; - u32 ichg_max; - u32 vreg; - u32 vreg_max; - struct mutex f_reg_lock; - u8 f_reg; - u8 ss_reg; - u8 watchdog; -}; - -static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi, - const union power_supply_propval *val); - -static const unsigned int bq24190_usb_extcon_cable[] = { - EXTCON_USB, - EXTCON_NONE, -}; - /* * The tables below provide a 2-way mapping for the value that goes in * the register field and the real-world value that it represents. @@ -211,6 +182,9 @@ static const int bq24190_ccc_ichg_values[] = { 4096000, 4160000, 4224000, 4288000, 4352000, 4416000, 4480000, 4544000 }; +/* ICHG higher than 3008mA is not supported in BQ24296 */ +#define BQ24296_CCC_ICHG_VALUES_LEN 40 + /* REG04[7:2] (VREG) in uV */ static const int bq24190_cvc_vreg_values[] = { 3504000, 3520000, 3536000, 3552000, 3568000, 3584000, 3600000, 3616000, @@ -228,6 +202,68 @@ static const int bq24190_ictrc_treg_values[] = { 600, 800, 1000, 1200 }; +enum bq24190_chip { + BQ24190, + BQ24192, + BQ24192i, + BQ24196, + BQ24296, +}; + +/* + * The FAULT register is latched by the bq24190 (except for NTC_FAULT) + * so the first read after a fault returns the latched value and subsequent + * reads return the current value. In order to return the fault status + * to the user, have the interrupt handler save the reg's value and retrieve + * it in the appropriate health/status routine. + */ +struct bq24190_dev_info { + struct i2c_client *client; + struct device *dev; + struct extcon_dev *edev; + struct power_supply *charger; + struct power_supply *battery; + struct delayed_work input_current_limit_work; + char model_name[I2C_NAME_SIZE]; + bool initialized; + bool irq_event; + bool otg_vbus_enabled; + int charge_type; + u16 sys_min; + u16 iprechg; + u16 iterm; + u32 ichg; + u32 ichg_max; + u32 vreg; + u32 vreg_max; + struct mutex f_reg_lock; + u8 f_reg; + u8 ss_reg; + u8 watchdog; + const struct bq24190_chip_info *info; +}; + +struct bq24190_chip_info { + int ichg_array_size; +#ifdef CONFIG_REGULATOR + const struct regulator_desc *vbus_desc; +#endif + int (*check_chip)(struct bq24190_dev_info *bdi); + int (*set_chg_config)(struct bq24190_dev_info *bdi, const u8 chg_config); + int (*set_otg_vbus)(struct bq24190_dev_info *bdi, bool enable); + u8 ntc_fault_mask; + int (*get_ntc_status)(const u8 value); +}; + +static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi, + const union power_supply_propval *val); + +static const unsigned int bq24190_usb_extcon_cable[] = { + EXTCON_USB, + EXTCON_NONE, +}; + + /* * Return the index in 'tbl' of greatest value that is less than or equal to * 'val'. The index range returned is 0 to 'tbl_size' - 1. Assumes that @@ -529,6 +565,43 @@ static int bq24190_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable) return ret; } +static int bq24296_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable) +{ + int ret; + + ret = pm_runtime_resume_and_get(bdi->dev); + if (ret < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret); + return ret; + } + + bdi->otg_vbus_enabled = enable; + if (enable) { + ret = bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24296_REG_POC_CHG_CONFIG_MASK, + BQ24296_REG_POC_CHG_CONFIG_SHIFT, + BQ24190_REG_POC_CHG_CONFIG_DISABLE); + + if (ret < 0) + goto out; + + ret = bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24296_REG_POC_OTG_CONFIG_MASK, + BQ24296_REG_POC_CHG_CONFIG_SHIFT, + BQ24296_REG_POC_OTG_CONFIG_OTG); + } else + ret = bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24296_REG_POC_OTG_CONFIG_MASK, + BQ24296_REG_POC_CHG_CONFIG_SHIFT, + BQ24296_REG_POC_OTG_CONFIG_DISABLE); + +out: + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + + return ret; +} + #ifdef CONFIG_REGULATOR static int bq24190_vbus_enable(struct regulator_dev *dev) { @@ -567,6 +640,43 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev) return bdi->otg_vbus_enabled; } +static int bq24296_vbus_enable(struct regulator_dev *dev) +{ + return bq24296_set_otg_vbus(rdev_get_drvdata(dev), true); +} + +static int bq24296_vbus_disable(struct regulator_dev *dev) +{ + return bq24296_set_otg_vbus(rdev_get_drvdata(dev), false); +} + +static int bq24296_vbus_is_enabled(struct regulator_dev *dev) +{ + struct bq24190_dev_info *bdi = rdev_get_drvdata(dev); + int ret; + u8 val; + + ret = pm_runtime_resume_and_get(bdi->dev); + if (ret < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret); + return ret; + } + + ret = bq24190_read_mask(bdi, BQ24190_REG_POC, + BQ24296_REG_POC_OTG_CONFIG_MASK, + BQ24296_REG_POC_OTG_CONFIG_SHIFT, &val); + + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + + if (ret) + return ret; + + bdi->otg_vbus_enabled = (val == BQ24296_REG_POC_OTG_CONFIG_OTG); + + return bdi->otg_vbus_enabled; +} + static const struct regulator_ops bq24190_vbus_ops = { .enable = bq24190_vbus_enable, .disable = bq24190_vbus_disable, @@ -583,6 +693,22 @@ static const struct regulator_desc bq24190_vbus_desc = { .n_voltages = 1, }; +static const struct regulator_ops bq24296_vbus_ops = { + .enable = bq24296_vbus_enable, + .disable = bq24296_vbus_disable, + .is_enabled = bq24296_vbus_is_enabled, +}; + +static const struct regulator_desc bq24296_vbus_desc = { + .name = "usb_otg_vbus", + .of_match = "usb-otg-vbus", + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + .ops = &bq24296_vbus_ops, + .fixed_uV = 5000000, + .n_voltages = 1, +}; + static const struct regulator_init_data bq24190_vbus_init_data = { .constraints = { .valid_ops_mask = REGULATOR_CHANGE_STATUS, @@ -602,7 +728,7 @@ static int bq24190_register_vbus_regulator(struct bq24190_dev_info *bdi) else cfg.init_data = &bq24190_vbus_init_data; cfg.driver_data = bdi; - reg = devm_regulator_register(bdi->dev, &bq24190_vbus_desc, &cfg); + reg = devm_regulator_register(bdi->dev, bdi->info->vbus_desc, &cfg); if (IS_ERR(reg)) { ret = PTR_ERR(reg); dev_err(bdi->dev, "Can't register regulator: %d\n", ret); @@ -678,7 +804,7 @@ static int bq24190_set_config(struct bq24190_dev_info *bdi) BQ24190_REG_CCC_ICHG_MASK, BQ24190_REG_CCC_ICHG_SHIFT, bq24190_ccc_ichg_values, - ARRAY_SIZE(bq24190_ccc_ichg_values), + bdi->info->ichg_array_size, bdi->ichg); if (ret < 0) return ret; @@ -777,6 +903,24 @@ static int bq24190_charger_get_charge_type(struct bq24190_dev_info *bdi, return 0; } +static int bq24190_battery_set_chg_config(struct bq24190_dev_info *bdi, + const u8 chg_config) +{ + return bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24190_REG_POC_CHG_CONFIG_MASK, + BQ24190_REG_POC_CHG_CONFIG_SHIFT, + chg_config); +} + +static int bq24296_battery_set_chg_config(struct bq24190_dev_info *bdi, + const u8 chg_config) +{ + return bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24296_REG_POC_CHG_CONFIG_MASK, + BQ24296_REG_POC_CHG_CONFIG_SHIFT, + chg_config); +} + static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi, const union power_supply_propval *val) { @@ -835,9 +979,50 @@ static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi, return ret; } - return bq24190_write_mask(bdi, BQ24190_REG_POC, - BQ24190_REG_POC_CHG_CONFIG_MASK, - BQ24190_REG_POC_CHG_CONFIG_SHIFT, chg_config); + return bdi->info->set_chg_config(bdi, chg_config); +} + +static int bq24190_charger_get_ntc_status(u8 value) +{ + int health; + + switch (value >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) { + case 0x1: /* TS1 Cold */ + case 0x3: /* TS2 Cold */ + case 0x5: /* Both Cold */ + health = POWER_SUPPLY_HEALTH_COLD; + break; + case 0x2: /* TS1 Hot */ + case 0x4: /* TS2 Hot */ + case 0x6: /* Both Hot */ + health = POWER_SUPPLY_HEALTH_OVERHEAT; + break; + default: + health = POWER_SUPPLY_HEALTH_UNKNOWN; + } + + return health; +} + +static int bq24296_charger_get_ntc_status(u8 value) +{ + int health; + + switch (value >> BQ24296_REG_F_NTC_FAULT_SHIFT & 0x3) { + case 0x0: /* Normal */ + health = POWER_SUPPLY_HEALTH_GOOD; + break; + case 0x1: /* Hot */ + health = POWER_SUPPLY_HEALTH_OVERHEAT; + break; + case 0x2: /* Cold */ + health = POWER_SUPPLY_HEALTH_COLD; + break; + default: + health = POWER_SUPPLY_HEALTH_UNKNOWN; + } + + return health; } static int bq24190_charger_get_health(struct bq24190_dev_info *bdi, @@ -850,21 +1035,8 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi, v = bdi->f_reg; mutex_unlock(&bdi->f_reg_lock); - if (v & BQ24190_REG_F_NTC_FAULT_MASK) { - switch (v >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) { - case 0x1: /* TS1 Cold */ - case 0x3: /* TS2 Cold */ - case 0x5: /* Both Cold */ - health = POWER_SUPPLY_HEALTH_COLD; - break; - case 0x2: /* TS1 Hot */ - case 0x4: /* TS2 Hot */ - case 0x6: /* Both Hot */ - health = POWER_SUPPLY_HEALTH_OVERHEAT; - break; - default: - health = POWER_SUPPLY_HEALTH_UNKNOWN; - } + if (v & bdi->info->ntc_fault_mask) { + health = bdi->info->get_ntc_status(v); } else if (v & BQ24190_REG_F_BAT_FAULT_MASK) { health = POWER_SUPPLY_HEALTH_OVERVOLTAGE; } else if (v & BQ24190_REG_F_CHRG_FAULT_MASK) { @@ -1015,7 +1187,7 @@ static int bq24190_charger_get_current(struct bq24190_dev_info *bdi, ret = bq24190_get_field_val(bdi, BQ24190_REG_CCC, BQ24190_REG_CCC_ICHG_MASK, BQ24190_REG_CCC_ICHG_SHIFT, bq24190_ccc_ichg_values, - ARRAY_SIZE(bq24190_ccc_ichg_values), &curr); + bdi->info->ichg_array_size, &curr); if (ret < 0) return ret; @@ -1055,7 +1227,7 @@ static int bq24190_charger_set_current(struct bq24190_dev_info *bdi, ret = bq24190_set_field_val(bdi, BQ24190_REG_CCC, BQ24190_REG_CCC_ICHG_MASK, BQ24190_REG_CCC_ICHG_SHIFT, bq24190_ccc_ichg_values, - ARRAY_SIZE(bq24190_ccc_ichg_values), curr); + bdi->info->ichg_array_size, curr); if (ret < 0) return ret; @@ -1395,26 +1567,9 @@ static int bq24190_battery_get_health(struct bq24190_dev_info *bdi, if (v & BQ24190_REG_F_BAT_FAULT_MASK) { health = POWER_SUPPLY_HEALTH_OVERVOLTAGE; } else { - v &= BQ24190_REG_F_NTC_FAULT_MASK; - v >>= BQ24190_REG_F_NTC_FAULT_SHIFT; + v &= bdi->info->ntc_fault_mask; - switch (v) { - case 0x0: /* Normal */ - health = POWER_SUPPLY_HEALTH_GOOD; - break; - case 0x1: /* TS1 Cold */ - case 0x3: /* TS2 Cold */ - case 0x5: /* Both Cold */ - health = POWER_SUPPLY_HEALTH_COLD; - break; - case 0x2: /* TS1 Hot */ - case 0x4: /* TS2 Hot */ - case 0x6: /* Both Hot */ - health = POWER_SUPPLY_HEALTH_OVERHEAT; - break; - default: - health = POWER_SUPPLY_HEALTH_UNKNOWN; - } + health = v ? bdi->info->get_ntc_status(v) : POWER_SUPPLY_HEALTH_GOOD; } val->intval = health; @@ -1601,12 +1756,13 @@ static int bq24190_configure_usb_otg(struct bq24190_dev_info *bdi, u8 ss_reg) static void bq24190_check_status(struct bq24190_dev_info *bdi) { const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK; - const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK - | BQ24190_REG_F_NTC_FAULT_MASK; + u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK; bool alert_charger = false, alert_battery = false; u8 ss_reg = 0, f_reg = 0; int i, ret; + battery_mask_f |= bdi->info->ntc_fault_mask; + ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg); if (ret < 0) { dev_err(bdi->dev, "Can't read SS reg: %d\n", ret); @@ -1633,7 +1789,7 @@ static void bq24190_check_status(struct bq24190_dev_info *bdi) !!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK), !!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK), !!(f_reg & BQ24190_REG_F_BAT_FAULT_MASK), - !!(f_reg & BQ24190_REG_F_NTC_FAULT_MASK)); + !!(f_reg & bdi->info->ntc_fault_mask)); mutex_lock(&bdi->f_reg_lock); if ((bdi->f_reg & battery_mask_f) != (f_reg & battery_mask_f)) @@ -1696,12 +1852,11 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) return IRQ_HANDLED; } -static int bq24190_hw_init(struct bq24190_dev_info *bdi) +static int bq24190_check_chip(struct bq24190_dev_info *bdi) { u8 v; int ret; - /* First check that the device really is what its supposed to be */ ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, BQ24190_REG_VPRS_PN_MASK, BQ24190_REG_VPRS_PN_SHIFT, @@ -1719,6 +1874,40 @@ static int bq24190_hw_init(struct bq24190_dev_info *bdi) return -ENODEV; } + return 0; +} + +static int bq24296_check_chip(struct bq24190_dev_info *bdi) +{ + u8 v; + int ret; + + ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, + BQ24296_REG_VPRS_PN_MASK, + BQ24296_REG_VPRS_PN_SHIFT, + &v); + if (ret < 0) + return ret; + + switch (v) { + case BQ24296_REG_VPRS_PN_24296: + break; + default: + dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v); + return -ENODEV; + } + + return 0; +} + +static int bq24190_hw_init(struct bq24190_dev_info *bdi) +{ + int ret; + + ret = bdi->info->check_chip(bdi); + if (ret < 0) + return ret; + ret = bq24190_register_reset(bdi); if (ret < 0) return ret; @@ -1736,7 +1925,8 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi) struct power_supply_battery_info *info; int v, idx; - idx = ARRAY_SIZE(bq24190_ccc_ichg_values) - 1; + idx = bdi->info->ichg_array_size - 1; + bdi->ichg_max = bq24190_ccc_ichg_values[idx]; idx = ARRAY_SIZE(bq24190_cvc_vreg_values) - 1; @@ -1781,6 +1971,64 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi) return 0; } +static const struct bq24190_chip_info bq24190_chip_info_tbl[] = { + [BQ24190] = { + .ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values), +#ifdef CONFIG_REGULATOR + .vbus_desc = &bq24190_vbus_desc, +#endif + .check_chip = bq24190_check_chip, + .set_chg_config = bq24190_battery_set_chg_config, + .ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK, + .get_ntc_status = bq24190_charger_get_ntc_status, + .set_otg_vbus = bq24190_set_otg_vbus, + }, + [BQ24192] = { + .ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values), +#ifdef CONFIG_REGULATOR + .vbus_desc = &bq24190_vbus_desc, +#endif + .check_chip = bq24190_check_chip, + .set_chg_config = bq24190_battery_set_chg_config, + .ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK, + .get_ntc_status = bq24190_charger_get_ntc_status, + .set_otg_vbus = bq24190_set_otg_vbus, + }, + [BQ24192i] = { + .ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values), +#ifdef CONFIG_REGULATOR + .vbus_desc = &bq24190_vbus_desc, +#endif + .check_chip = bq24190_check_chip, + .set_chg_config = bq24190_battery_set_chg_config, + .ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK, + .get_ntc_status = bq24190_charger_get_ntc_status, + .set_otg_vbus = bq24190_set_otg_vbus, + }, + [BQ24196] = { + .ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values), +#ifdef CONFIG_REGULATOR + .vbus_desc = &bq24190_vbus_desc, +#endif + .check_chip = bq24190_check_chip, + .set_chg_config = bq24190_battery_set_chg_config, + .ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK, + .get_ntc_status = bq24190_charger_get_ntc_status, + .set_otg_vbus = bq24190_set_otg_vbus, + }, + [BQ24296] = { + .ichg_array_size = BQ24296_CCC_ICHG_VALUES_LEN, +#ifdef CONFIG_REGULATOR + .vbus_desc = &bq24296_vbus_desc, +#endif + .check_chip = bq24296_check_chip, + .set_chg_config = bq24296_battery_set_chg_config, + .ntc_fault_mask = BQ24296_REG_F_NTC_FAULT_MASK, + .get_ntc_status = bq24296_charger_get_ntc_status, + .set_otg_vbus = bq24296_set_otg_vbus, + }, +}; + static int bq24190_probe(struct i2c_client *client) { const struct i2c_device_id *id = i2c_client_get_device_id(client); @@ -1804,6 +2052,7 @@ static int bq24190_probe(struct i2c_client *client) bdi->client = client; bdi->dev = dev; strscpy(bdi->model_name, id->name, sizeof(bdi->model_name)); + bdi->info = i2c_get_match_data(client); mutex_init(&bdi->f_reg_lock); bdi->charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST; bdi->f_reg = 0; @@ -1940,7 +2189,7 @@ static void bq24190_shutdown(struct i2c_client *client) struct bq24190_dev_info *bdi = i2c_get_clientdata(client); /* Turn off 5V boost regulator on shutdown */ - bq24190_set_otg_vbus(bdi, false); + bdi->info->set_otg_vbus(bdi, false); } static __maybe_unused int bq24190_runtime_suspend(struct device *dev) @@ -2029,19 +2278,21 @@ static const struct dev_pm_ops bq24190_pm_ops = { }; static const struct i2c_device_id bq24190_i2c_ids[] = { - { "bq24190" }, - { "bq24192" }, - { "bq24192i" }, - { "bq24196" }, + { "bq24190", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24190] }, + { "bq24192", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192] }, + { "bq24192i", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192i] }, + { "bq24196", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24196] }, + { "bq24296", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24296] }, { }, }; MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids); static const struct of_device_id bq24190_of_match[] = { - { .compatible = "ti,bq24190", }, - { .compatible = "ti,bq24192", }, - { .compatible = "ti,bq24192i", }, - { .compatible = "ti,bq24196", }, + { .compatible = "ti,bq24190", .data = &bq24190_chip_info_tbl[BQ24190] }, + { .compatible = "ti,bq24192", .data = &bq24190_chip_info_tbl[BQ24192] }, + { .compatible = "ti,bq24192i", .data = &bq24190_chip_info_tbl[BQ24192i] }, + { .compatible = "ti,bq24196", .data = &bq24190_chip_info_tbl[BQ24196] }, + { .compatible = "ti,bq24296", .data = &bq24190_chip_info_tbl[BQ24296] }, { }, }; MODULE_DEVICE_TABLE(of, bq24190_of_match); diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c index 789a31bd70c39f2954527bfb565dc53fda0d6c21..1a935bc885108e7769b6fe2c5aa344a7530ed839 100644 --- a/drivers/power/supply/bq256xx_charger.c +++ b/drivers/power/supply/bq256xx_charger.c @@ -1574,13 +1574,16 @@ static int bq256xx_hw_init(struct bq256xx_device *bq) wd_reg_val = i; break; } - if (bq->watchdog_timer > bq256xx_watchdog_time[i] && + if (i + 1 < BQ256XX_NUM_WD_VAL && + bq->watchdog_timer > bq256xx_watchdog_time[i] && bq->watchdog_timer < bq256xx_watchdog_time[i + 1]) wd_reg_val = i; } ret = regmap_update_bits(bq->regmap, BQ256XX_CHARGER_CONTROL_1, BQ256XX_WATCHDOG_MASK, wd_reg_val << BQ256XX_WDT_BIT_SHIFT); + if (ret) + return ret; ret = power_supply_get_battery_info(bq->charger, &bat_info); if (ret == -ENOMEM) diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 4296600e8912a3988c45286f58420ffabb547345..1c4a9d1377442ad98f4e3fcb3b1215bf3e49b20b 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -2162,6 +2162,28 @@ void bq27xxx_battery_teardown(struct bq27xxx_device_info *di) } EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown); +#ifdef CONFIG_PM_SLEEP +static int bq27xxx_battery_suspend(struct device *dev) +{ + struct bq27xxx_device_info *di = dev_get_drvdata(dev); + + cancel_delayed_work(&di->work); + return 0; +} + +static int bq27xxx_battery_resume(struct device *dev) +{ + struct bq27xxx_device_info *di = dev_get_drvdata(dev); + + schedule_delayed_work(&di->work, 0); + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +SIMPLE_DEV_PM_OPS(bq27xxx_battery_battery_pm_ops, + bq27xxx_battery_suspend, bq27xxx_battery_resume); +EXPORT_SYMBOL_GPL(bq27xxx_battery_battery_pm_ops); + MODULE_AUTHOR("Rodolfo Giometti "); MODULE_DESCRIPTION("BQ27xxx battery monitor driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 9b5475590518fb23153acdc2a9ceea403bc73fef..3a1798b0c1a79f3ed3a3fd0be4d84f6df390b3b4 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -295,6 +295,7 @@ static struct i2c_driver bq27xxx_battery_i2c_driver = { .driver = { .name = "bq27xxx-battery", .of_match_table = of_match_ptr(bq27xxx_battery_i2c_of_match_table), + .pm = &bq27xxx_battery_battery_pm_ops, }, .probe = bq27xxx_battery_i2c_probe, .remove = bq27xxx_battery_i2c_remove, diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c index bb29e9ebd24a8eb2b96f5a1513f02419aa14d043..99f3ccdc30a6a77dc06ba6c7ba54ed47e7358b9c 100644 --- a/drivers/power/supply/cw2015_battery.c +++ b/drivers/power/supply/cw2015_battery.c @@ -491,7 +491,7 @@ static int cw_battery_get_property(struct power_supply *psy, case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW: if (cw_battery_valid_time_to_empty(cw_bat)) - val->intval = cw_bat->time_to_empty; + val->intval = cw_bat->time_to_empty * 60; else val->intval = 0; break; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 813293f09907808afb088e8973e6265065a70512..97608f04f9078e2f4375245e997b051d880cc689 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1185,6 +1185,7 @@ EXPORT_SYMBOL_GPL(power_supply_powers); static void power_supply_dev_release(struct device *dev) { struct power_supply *psy = to_power_supply(dev); + dev_dbg(dev, "%s\n", __func__); kfree(psy); } @@ -1571,6 +1572,6 @@ subsys_initcall(power_supply_class_init); module_exit(power_supply_class_exit); MODULE_DESCRIPTION("Universal power supply monitor class"); -MODULE_AUTHOR("Ian Molton , " - "Szabolcs Gyurko, " - "Anton Vorontsov "); +MODULE_AUTHOR("Ian Molton "); +MODULE_AUTHOR("Szabolcs Gyurko"); +MODULE_AUTHOR("Anton Vorontsov "); diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index ec163d1bcd189192abcecbcb4e29e0e4251b2e38..a12e2a66d516f9de6e4b7ccc3f8048861322624a 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -282,6 +282,7 @@ struct qcom_battmgr_wireless { struct qcom_battmgr { struct device *dev; + struct auxiliary_device *adev; struct pmic_glink_client *client; enum qcom_battmgr_variant variant; @@ -1293,11 +1294,69 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) dev_err(battmgr->dev, "failed to request power notifications\n"); } +static char *qcom_battmgr_battery[] = { "battery" }; + +static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr) +{ + struct power_supply_config psy_cfg_supply = {}; + struct auxiliary_device *adev = battmgr->adev; + struct power_supply_config psy_cfg = {}; + struct device *dev = &adev->dev; + + psy_cfg.drv_data = battmgr; + psy_cfg.of_node = adev->dev.of_node; + + psy_cfg_supply.drv_data = battmgr; + psy_cfg_supply.of_node = adev->dev.of_node; + psy_cfg_supply.supplied_to = qcom_battmgr_battery; + psy_cfg_supply.num_supplicants = 1; + + if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { + battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + dev_err(dev, "failed to register battery power supply (%ld)\n", + PTR_ERR(battmgr->bat_psy)); + + battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->ac_psy)) + dev_err(dev, "failed to register AC power supply (%ld)\n", + PTR_ERR(battmgr->ac_psy)); + + battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + dev_err(dev, "failed to register USB power supply (%ld)\n", + PTR_ERR(battmgr->usb_psy)); + + battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + dev_err(dev, "failed to register wireless charing power supply (%ld)\n", + PTR_ERR(battmgr->wls_psy)); + } else { + battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + dev_err(dev, "failed to register battery power supply (%ld)\n", + PTR_ERR(battmgr->bat_psy)); + + battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + dev_err(dev, "failed to register USB power supply (%ld)\n", + PTR_ERR(battmgr->usb_psy)); + + battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + dev_err(dev, "failed to register wireless charing power supply (%ld)\n", + PTR_ERR(battmgr->wls_psy)); + } +} + static void qcom_battmgr_pdr_notify(void *priv, int state) { struct qcom_battmgr *battmgr = priv; if (state == SERVREG_SERVICE_STATE_UP) { + if (!battmgr->bat_psy) + qcom_battmgr_register_psy(battmgr); + battmgr->service_up = true; schedule_work(&battmgr->enable_work); } else { @@ -1312,13 +1371,9 @@ static const struct of_device_id qcom_battmgr_of_variants[] = { {} }; -static char *qcom_battmgr_battery[] = { "battery" }; - static int qcom_battmgr_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { - struct power_supply_config psy_cfg_supply = {}; - struct power_supply_config psy_cfg = {}; const struct of_device_id *match; struct qcom_battmgr *battmgr; struct device *dev = &adev->dev; @@ -1328,14 +1383,7 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, return -ENOMEM; battmgr->dev = dev; - - psy_cfg.drv_data = battmgr; - psy_cfg.of_node = adev->dev.of_node; - - psy_cfg_supply.drv_data = battmgr; - psy_cfg_supply.of_node = adev->dev.of_node; - psy_cfg_supply.supplied_to = qcom_battmgr_battery; - psy_cfg_supply.num_supplicants = 1; + battmgr->adev = adev; INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker); mutex_init(&battmgr->lock); @@ -1347,43 +1395,6 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, else battmgr->variant = QCOM_BATTMGR_SM8350; - if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { - battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), - "failed to register battery power supply\n"); - - battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->ac_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy), - "failed to register AC power supply\n"); - - battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), - "failed to register USB power supply\n"); - - battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), - "failed to register wireless charing power supply\n"); - } else { - battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), - "failed to register battery power supply\n"); - - battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), - "failed to register USB power supply\n"); - - battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), - "failed to register wireless charing power supply\n"); - } - battmgr->client = devm_pmic_glink_register_client(dev, PMIC_GLINK_OWNER_BATTMGR, qcom_battmgr_callback, diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c index 8acf63ee6897f15d4b231240162e658fb9af76c1..9bb7774060138ed2149c88c79bc4ec96c6a256f9 100644 --- a/drivers/power/supply/qcom_pmi8998_charger.c +++ b/drivers/power/supply/qcom_pmi8998_charger.c @@ -972,10 +972,14 @@ static int smb2_probe(struct platform_device *pdev) supply_config.of_node = pdev->dev.of_node; desc = devm_kzalloc(chip->dev, sizeof(smb2_psy_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; memcpy(desc, &smb2_psy_desc, sizeof(smb2_psy_desc)); desc->name = devm_kasprintf(chip->dev, GFP_KERNEL, "%s-charger", (const char *)device_get_match_data(chip->dev)); + if (!desc->name) + return -ENOMEM; chip->chg_psy = devm_power_supply_register(chip->dev, desc, &supply_config); diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 9a4c720c88aadd8ad2667cde7e787f146191739c..f2728ee787d7a5167e8a9f25e1c8550803c60117 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -152,7 +152,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args) pwm->args.period = args->args[0]; pwm->args.polarity = PWM_POLARITY_NORMAL; - if (args->args_count == 2 && args->args[2] & PWM_POLARITY_INVERTED) + if (args->args_count == 2 && args->args[1] & PWM_POLARITY_INVERTED) pwm->args.polarity = PWM_POLARITY_INVERSED; return pwm; diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index 307c0bd5f88557f596f49d4e590fb9334d95ee62..283cf27f25bae01fd6b2702781e04ce77188c924 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -160,10 +160,8 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) ret = devm_add_action_or_reset(&pdev->dev, devm_clk_rate_exclusive_put, pc->clk); - if (ret) { - clk_rate_exclusive_put(pc->clk); + if (ret) return ret; - } pc->rate = clk_get_rate(pc->clk); if (!pc->rate) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 80dcff237a15540c0c132ccd8b46546e063726c6..3933418e551b412b22edbb4b099add56cf6f601b 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -61,9 +61,10 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) snprintf(name, sizeof(name), "timer%u", pwm->hwpwm); clk = clk_get(chip->dev, name); - if (IS_ERR(clk)) - return dev_err_probe(chip->dev, PTR_ERR(clk), - "Failed to get clock\n"); + if (IS_ERR(clk)) { + dev_err(chip->dev, "error %pe: Failed to get clock\n", clk); + return PTR_ERR(clk); + } err = clk_prepare_enable(clk); if (err < 0) { diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 3814e0845e7729d9d584680e16abb4ec21f0dda7..e37a4341f442d8ca2fcd80e82bd2adf6c2ca9ea5 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -373,6 +373,19 @@ config RTC_DRV_MAX8997 This driver can also be built as a module. If so, the module will be called rtc-max8997. +config RTC_DRV_MAX31335 + tristate "Analog Devices MAX31335" + depends on I2C + depends on COMMON_CLK + depends on HWMON || HWMON=n + select REGMAP_I2C + help + If you say yes here you get support for the Analog Devices + MAX31335. + + This driver can also be built as a module. If so, the module + will be called rtc-max31335. + config RTC_DRV_MAX77686 tristate "Maxim MAX77686" depends on MFD_MAX77686 || MFD_MAX77620 || MFD_MAX77714 || COMPILE_TEST @@ -578,6 +591,18 @@ config RTC_DRV_TPS6586X along with alarm. This driver supports the RTC driver for the TPS6586X RTC module. +config RTC_DRV_TPS6594 + tristate "TI TPS6594 RTC driver" + depends on MFD_TPS6594 + default MFD_TPS6594 + help + TI Power Management IC TPS6594 supports RTC functionality + along with alarm. This driver supports the RTC driver for + the TPS6594 RTC module. + + This driver can also be built as a module. If so, the module + will be called rtc-tps6594. + config RTC_DRV_TPS65910 tristate "TI TPS65910 RTC driver" depends on MFD_TPS65910 @@ -1705,6 +1730,7 @@ config RTC_DRV_LPC24XX tristate "NXP RTC for LPC178x/18xx/408x/43xx" depends on ARCH_LPC18XX || COMPILE_TEST depends on OF && HAS_IOMEM + depends on COMMON_CLK help This enables support for the NXP RTC found which can be found on NXP LPC178x/18xx/408x/43xx devices. @@ -1930,6 +1956,17 @@ config RTC_DRV_TI_K3 This driver can also be built as a module, if so, the module will be called "rtc-ti-k3". +config RTC_DRV_MA35D1 + tristate "Nuvoton MA35D1 RTC" + depends on ARCH_MA35 || COMPILE_TEST + select REGMAP_MMIO + help + If you say yes here you get support for the Nuvoton MA35D1 + On-Chip Real Time Clock. + + This driver can also be built as a module, if so, the module + will be called "rtc-ma35d1". + comment "HID Sensor RTC drivers" config RTC_DRV_HID_SENSOR_TIME diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 7b03c3abfd786eb03356566fc839aff542da7442..6efff381c484d5d386180de033c07569621c667f 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -88,6 +88,8 @@ obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o obj-$(CONFIG_RTC_DRV_M48T35) += rtc-m48t35.o obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o +obj-$(CONFIG_RTC_DRV_MA35D1) += rtc-ma35d1.o +obj-$(CONFIG_RTC_DRV_MAX31335) += rtc-max31335.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o obj-$(CONFIG_RTC_DRV_MAX6916) += rtc-max6916.o @@ -176,6 +178,7 @@ obj-$(CONFIG_RTC_DRV_TEGRA) += rtc-tegra.o obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o obj-$(CONFIG_RTC_DRV_TI_K3) += rtc-ti-k3.o obj-$(CONFIG_RTC_DRV_TPS6586X) += rtc-tps6586x.o +obj-$(CONFIG_RTC_DRV_TPS6594) += rtc-tps6594.o obj-$(CONFIG_RTC_DRV_TPS65910) += rtc-tps65910.o obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl.o obj-$(CONFIG_RTC_DRV_VT8500) += rtc-vt8500.o diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index edfd942f8c54942162808d034823f5fd86424b59..921ee182797439b1496239c93b5d9e8ed76a0b45 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -256,7 +256,7 @@ static int rtc_device_get_id(struct device *dev) of_id = of_alias_get_id(dev->parent->of_node, "rtc"); if (of_id >= 0) { - id = ida_simple_get(&rtc_ida, of_id, of_id + 1, GFP_KERNEL); + id = ida_alloc_range(&rtc_ida, of_id, of_id, GFP_KERNEL); if (id < 0) dev_warn(dev, "/aliases ID %d not available\n", of_id); } diff --git a/drivers/rtc/rtc-ac100.c b/drivers/rtc/rtc-ac100.c index eaf2c9ab96619c1baad80711112f30ce9a94c22d..fa642bba3cee008c9d4fe4abe2cc37aebe2eb41c 100644 --- a/drivers/rtc/rtc-ac100.c +++ b/drivers/rtc/rtc-ac100.c @@ -99,7 +99,7 @@ struct ac100_rtc_dev { struct clk_hw_onecell_data *clk_data; }; -/** +/* * Clock controls for 3 clock output pins */ @@ -378,7 +378,7 @@ static void ac100_rtc_unregister_clks(struct ac100_rtc_dev *chip) clk_unregister_fixed_rate(chip->rtc_32k_clk->clk); } -/** +/* * RTC related bits */ static int ac100_rtc_get_time(struct device *dev, struct rtc_time *rtc_tm) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 228fb2d11c7091e00d9bb26b2254db2ed923c097..7d99cd2c37a0ba87c06beb2c7dcbe0f560d26bb9 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -231,7 +231,7 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - ret = mc146818_get_time(t); + ret = mc146818_get_time(t, 1000); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); return ret; @@ -292,7 +292,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) - return -EIO; + return -ETIMEDOUT; /* Basic alarms only support hour, minute, and seconds fields. * Some also support day and month, for alarms up to a year in @@ -307,7 +307,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use the mc146818_avoid_UIP() function to avoid this. */ - if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) + if (!mc146818_avoid_UIP(cmos_read_alarm_callback, 10, &p)) return -EIO; if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { @@ -556,8 +556,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use mc146818_avoid_UIP() to avoid this. */ - if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) - return -EIO; + if (!mc146818_avoid_UIP(cmos_set_alarm_callback, 10, &p)) + return -ETIMEDOUT; cmos->alarm_expires = rtc_tm_to_time64(&t->time); @@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev) } #ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ static void use_acpi_alarm_quirks(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (dmi_get_bios_year() < 2015) + return; + break; + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (dmi_get_bios_year() < 2021) + return; + break; + default: return; - + } if (!is_hpet_enabled()) return; - if (dmi_get_bios_year() < 2015) - return; - use_acpi_alarm = true; } #else diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index 2f5d60622564a0d83ec3be91bf89710f34e2b1b7..859397541f2983fb32163b780245554e0e84084a 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -377,7 +377,6 @@ static int da9063_rtc_probe(struct platform_device *pdev) { struct da9063_compatible_rtc *rtc; const struct da9063_compatible_rtc_regmap *config; - const struct of_device_id *match; int irq_alarm; u8 data[RTC_DATA_LEN]; int ret; @@ -385,14 +384,11 @@ static int da9063_rtc_probe(struct platform_device *pdev) if (!pdev->dev.of_node) return -ENXIO; - match = of_match_node(da9063_compatible_reg_id_table, - pdev->dev.of_node); - rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; - rtc->config = match->data; + rtc->config = device_get_match_data(&pdev->dev); if (of_device_is_compatible(pdev->dev.of_node, "dlg,da9063-rtc")) { struct da9063 *chip = dev_get_drvdata(pdev->dev.parent); @@ -411,57 +407,49 @@ static int da9063_rtc_probe(struct platform_device *pdev) config->rtc_enable_reg, config->rtc_enable_mask, config->rtc_enable_mask); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to enable RTC\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "Failed to enable RTC\n"); ret = regmap_update_bits(rtc->regmap, config->rtc_enable_32k_crystal_reg, config->rtc_crystal_mask, config->rtc_crystal_mask); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to run 32kHz oscillator\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to run 32kHz oscillator\n"); ret = regmap_update_bits(rtc->regmap, config->rtc_alarm_secs_reg, config->rtc_alarm_status_mask, 0); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to access RTC alarm register\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to access RTC alarm register\n"); ret = regmap_update_bits(rtc->regmap, config->rtc_alarm_secs_reg, DA9063_ALARM_STATUS_ALARM, DA9063_ALARM_STATUS_ALARM); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to access RTC alarm register\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to access RTC alarm register\n"); ret = regmap_update_bits(rtc->regmap, config->rtc_alarm_year_reg, config->rtc_tick_on_mask, 0); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to disable TICKs\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to disable TICKs\n"); data[RTC_SEC] = 0; ret = regmap_bulk_read(rtc->regmap, config->rtc_alarm_secs_reg, &data[config->rtc_data_start], config->rtc_alarm_len); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n", - ret); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to read initial alarm data\n"); platform_set_drvdata(pdev, rtc); @@ -485,25 +473,29 @@ static int da9063_rtc_probe(struct platform_device *pdev) clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtc_dev->features); } - irq_alarm = platform_get_irq_byname(pdev, "ALARM"); - if (irq_alarm < 0) + irq_alarm = platform_get_irq_byname_optional(pdev, "ALARM"); + if (irq_alarm >= 0) { + ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL, + da9063_alarm_event, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "ALARM", rtc); + if (ret) + dev_err(&pdev->dev, + "Failed to request ALARM IRQ %d: %d\n", + irq_alarm, ret); + + ret = dev_pm_set_wake_irq(&pdev->dev, irq_alarm); + if (ret) + dev_warn(&pdev->dev, + "Failed to set IRQ %d as a wake IRQ: %d\n", + irq_alarm, ret); + + device_init_wakeup(&pdev->dev, true); + } else if (irq_alarm != -ENXIO) { return irq_alarm; - - ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL, - da9063_alarm_event, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - "ALARM", rtc); - if (ret) - dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n", - irq_alarm, ret); - - ret = dev_pm_set_wake_irq(&pdev->dev, irq_alarm); - if (ret) - dev_warn(&pdev->dev, - "Failed to set IRQ %d as a wake IRQ: %d\n", - irq_alarm, ret); - - device_init_wakeup(&pdev->dev, true); + } else { + clear_bit(RTC_FEATURE_ALARM, rtc->rtc_dev->features); + } return devm_rtc_register_device(rtc->rtc_dev); } diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 89d7b085f7219954b1a154ef41245e76251e9336..1485a6ae51e613092c4d08fb44f3d6cc7d385816 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -536,6 +536,8 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq, return 0; } +#if IS_ENABLED(CONFIG_I2C) + #ifdef CONFIG_PM_SLEEP static int ds3232_suspend(struct device *dev) { @@ -564,8 +566,6 @@ static const struct dev_pm_ops ds3232_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(ds3232_suspend, ds3232_resume) }; -#if IS_ENABLED(CONFIG_I2C) - static int ds3232_i2c_probe(struct i2c_client *client) { struct regmap *regmap; diff --git a/drivers/rtc/rtc-ma35d1.c b/drivers/rtc/rtc-ma35d1.c new file mode 100644 index 0000000000000000000000000000000000000000..cfcfc28060f6210d53b0d5f722ec4f98134303fd --- /dev/null +++ b/drivers/rtc/rtc-ma35d1.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RTC driver for Nuvoton MA35D1 + * + * Copyright (C) 2023 Nuvoton Technology Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* MA35D1 RTC Control Registers */ +#define MA35_REG_RTC_INIT 0x00 +#define MA35_REG_RTC_SINFASTS 0x04 +#define MA35_REG_RTC_FREQADJ 0x08 +#define MA35_REG_RTC_TIME 0x0c +#define MA35_REG_RTC_CAL 0x10 +#define MA35_REG_RTC_CLKFMT 0x14 +#define MA35_REG_RTC_WEEKDAY 0x18 +#define MA35_REG_RTC_TALM 0x1c +#define MA35_REG_RTC_CALM 0x20 +#define MA35_REG_RTC_LEAPYEAR 0x24 +#define MA35_REG_RTC_INTEN 0x28 +#define MA35_REG_RTC_INTSTS 0x2c + +/* register MA35_REG_RTC_INIT */ +#define RTC_INIT_ACTIVE BIT(0) +#define RTC_INIT_MAGIC_CODE 0xa5eb1357 + +/* register MA35_REG_RTC_CLKFMT */ +#define RTC_CLKFMT_24HEN BIT(0) +#define RTC_CLKFMT_DCOMPEN BIT(16) + +/* register MA35_REG_RTC_INTEN */ +#define RTC_INTEN_ALMIEN BIT(0) +#define RTC_INTEN_UIEN BIT(1) +#define RTC_INTEN_CLKFIEN BIT(24) +#define RTC_INTEN_CLKSTIEN BIT(25) + +/* register MA35_REG_RTC_INTSTS */ +#define RTC_INTSTS_ALMIF BIT(0) +#define RTC_INTSTS_UIF BIT(1) +#define RTC_INTSTS_CLKFIF BIT(24) +#define RTC_INTSTS_CLKSTIF BIT(25) + +#define RTC_INIT_TIMEOUT 250 + +struct ma35_rtc { + int irq_num; + void __iomem *rtc_reg; + struct rtc_device *rtcdev; +}; + +static u32 rtc_reg_read(struct ma35_rtc *p, u32 offset) +{ + return __raw_readl(p->rtc_reg + offset); +} + +static inline void rtc_reg_write(struct ma35_rtc *p, u32 offset, u32 value) +{ + __raw_writel(value, p->rtc_reg + offset); +} + +static irqreturn_t ma35d1_rtc_interrupt(int irq, void *data) +{ + struct ma35_rtc *rtc = (struct ma35_rtc *)data; + unsigned long events = 0, rtc_irq; + + rtc_irq = rtc_reg_read(rtc, MA35_REG_RTC_INTSTS); + + if (rtc_irq & RTC_INTSTS_ALMIF) { + rtc_reg_write(rtc, MA35_REG_RTC_INTSTS, RTC_INTSTS_ALMIF); + events |= RTC_AF | RTC_IRQF; + } + + rtc_update_irq(rtc->rtcdev, 1, events); + + return IRQ_HANDLED; +} + +static int ma35d1_rtc_init(struct ma35_rtc *rtc, u32 ms_timeout) +{ + const unsigned long timeout = jiffies + msecs_to_jiffies(ms_timeout); + + do { + if (rtc_reg_read(rtc, MA35_REG_RTC_INIT) & RTC_INIT_ACTIVE) + return 0; + + rtc_reg_write(rtc, MA35_REG_RTC_INIT, RTC_INIT_MAGIC_CODE); + + mdelay(1); + + } while (time_before(jiffies, timeout)); + + return -ETIMEDOUT; +} + +static int ma35d1_alarm_irq_enable(struct device *dev, u32 enabled) +{ + struct ma35_rtc *rtc = dev_get_drvdata(dev); + u32 reg_ien; + + reg_ien = rtc_reg_read(rtc, MA35_REG_RTC_INTEN); + + if (enabled) + rtc_reg_write(rtc, MA35_REG_RTC_INTEN, reg_ien | RTC_INTEN_ALMIEN); + else + rtc_reg_write(rtc, MA35_REG_RTC_INTEN, reg_ien & ~RTC_INTEN_ALMIEN); + + return 0; +} + +static int ma35d1_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct ma35_rtc *rtc = dev_get_drvdata(dev); + u32 time, cal, wday; + + do { + time = rtc_reg_read(rtc, MA35_REG_RTC_TIME); + cal = rtc_reg_read(rtc, MA35_REG_RTC_CAL); + wday = rtc_reg_read(rtc, MA35_REG_RTC_WEEKDAY); + } while (time != rtc_reg_read(rtc, MA35_REG_RTC_TIME) || + cal != rtc_reg_read(rtc, MA35_REG_RTC_CAL)); + + tm->tm_mday = bcd2bin(cal >> 0); + tm->tm_wday = wday; + tm->tm_mon = bcd2bin(cal >> 8); + tm->tm_mon = tm->tm_mon - 1; + tm->tm_year = bcd2bin(cal >> 16) + 100; + + tm->tm_sec = bcd2bin(time >> 0); + tm->tm_min = bcd2bin(time >> 8); + tm->tm_hour = bcd2bin(time >> 16); + + return rtc_valid_tm(tm); +} + +static int ma35d1_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct ma35_rtc *rtc = dev_get_drvdata(dev); + u32 val; + + val = bin2bcd(tm->tm_mday) << 0 | bin2bcd(tm->tm_mon + 1) << 8 | + bin2bcd(tm->tm_year - 100) << 16; + rtc_reg_write(rtc, MA35_REG_RTC_CAL, val); + + val = bin2bcd(tm->tm_sec) << 0 | bin2bcd(tm->tm_min) << 8 | + bin2bcd(tm->tm_hour) << 16; + rtc_reg_write(rtc, MA35_REG_RTC_TIME, val); + + val = tm->tm_wday; + rtc_reg_write(rtc, MA35_REG_RTC_WEEKDAY, val); + + return 0; +} + +static int ma35d1_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct ma35_rtc *rtc = dev_get_drvdata(dev); + u32 talm, calm; + + talm = rtc_reg_read(rtc, MA35_REG_RTC_TALM); + calm = rtc_reg_read(rtc, MA35_REG_RTC_CALM); + + alrm->time.tm_mday = bcd2bin(calm >> 0); + alrm->time.tm_mon = bcd2bin(calm >> 8); + alrm->time.tm_mon = alrm->time.tm_mon - 1; + + alrm->time.tm_year = bcd2bin(calm >> 16) + 100; + + alrm->time.tm_sec = bcd2bin(talm >> 0); + alrm->time.tm_min = bcd2bin(talm >> 8); + alrm->time.tm_hour = bcd2bin(talm >> 16); + + return rtc_valid_tm(&alrm->time); +} + +static int ma35d1_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct ma35_rtc *rtc = dev_get_drvdata(dev); + unsigned long val; + + val = bin2bcd(alrm->time.tm_mday) << 0 | bin2bcd(alrm->time.tm_mon + 1) << 8 | + bin2bcd(alrm->time.tm_year - 100) << 16; + rtc_reg_write(rtc, MA35_REG_RTC_CALM, val); + + val = bin2bcd(alrm->time.tm_sec) << 0 | bin2bcd(alrm->time.tm_min) << 8 | + bin2bcd(alrm->time.tm_hour) << 16; + rtc_reg_write(rtc, MA35_REG_RTC_TALM, val); + + ma35d1_alarm_irq_enable(dev, alrm->enabled); + + return 0; +} + +static const struct rtc_class_ops ma35d1_rtc_ops = { + .read_time = ma35d1_rtc_read_time, + .set_time = ma35d1_rtc_set_time, + .read_alarm = ma35d1_rtc_read_alarm, + .set_alarm = ma35d1_rtc_set_alarm, + .alarm_irq_enable = ma35d1_alarm_irq_enable, +}; + +static int ma35d1_rtc_probe(struct platform_device *pdev) +{ + struct ma35_rtc *rtc; + struct clk *clk; + int ret; + + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); + if (!rtc) + return -ENOMEM; + + rtc->rtc_reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(rtc->rtc_reg)) + return PTR_ERR(rtc->rtc_reg); + + clk = of_clk_get(pdev->dev.of_node, 0); + if (IS_ERR(clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(clk), "failed to find rtc clock\n"); + + ret = clk_prepare_enable(clk); + if (ret) + return ret; + + if (!(rtc_reg_read(rtc, MA35_REG_RTC_INIT) & RTC_INIT_ACTIVE)) { + ret = ma35d1_rtc_init(rtc, RTC_INIT_TIMEOUT); + if (ret) + return dev_err_probe(&pdev->dev, ret, "rtc init failed\n"); + } + + rtc->irq_num = platform_get_irq(pdev, 0); + + ret = devm_request_irq(&pdev->dev, rtc->irq_num, ma35d1_rtc_interrupt, + IRQF_NO_SUSPEND, "ma35d1rtc", rtc); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to request rtc irq\n"); + + platform_set_drvdata(pdev, rtc); + + device_init_wakeup(&pdev->dev, true); + + rtc->rtcdev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc->rtcdev)) + return PTR_ERR(rtc->rtcdev); + + rtc->rtcdev->ops = &ma35d1_rtc_ops; + rtc->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099; + + ret = devm_rtc_register_device(rtc->rtcdev); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register rtc device\n"); + + return 0; +} + +static int ma35d1_rtc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct ma35_rtc *rtc = platform_get_drvdata(pdev); + + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(rtc->irq_num); + + return 0; +} + +static int ma35d1_rtc_resume(struct platform_device *pdev) +{ + struct ma35_rtc *rtc = platform_get_drvdata(pdev); + + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(rtc->irq_num); + + return 0; +} + +static const struct of_device_id ma35d1_rtc_of_match[] = { + { .compatible = "nuvoton,ma35d1-rtc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ma35d1_rtc_of_match); + +static struct platform_driver ma35d1_rtc_driver = { + .suspend = ma35d1_rtc_suspend, + .resume = ma35d1_rtc_resume, + .probe = ma35d1_rtc_probe, + .driver = { + .name = "rtc-ma35d1", + .of_match_table = ma35d1_rtc_of_match, + }, +}; + +module_platform_driver(ma35d1_rtc_driver); + +MODULE_AUTHOR("Ming-Jen Chen "); +MODULE_DESCRIPTION("MA35D1 RTC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c new file mode 100644 index 0000000000000000000000000000000000000000..402fda8fd54884eb8d1485ab456de696049dc510 --- /dev/null +++ b/drivers/rtc/rtc-max31335.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RTC driver for the MAX31335 + * + * Copyright (C) 2023 Analog Devices + * + * Antoniu Miclaus + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* MAX31335 Register Map */ +#define MAX31335_STATUS1 0x00 +#define MAX31335_INT_EN1 0x01 +#define MAX31335_STATUS2 0x02 +#define MAX31335_INT_EN2 0x03 +#define MAX31335_RTC_RESET 0x04 +#define MAX31335_RTC_CONFIG 0x05 +#define MAX31335_RTC_CONFIG2 0x06 +#define MAX31335_TIMESTAMP_CONFIG 0x07 +#define MAX31335_TIMER_CONFIG 0x08 +#define MAX31335_SECONDS_1_128 0x09 +#define MAX31335_SECONDS 0x0A +#define MAX31335_MINUTES 0x0B +#define MAX31335_HOURS 0x0C +#define MAX31335_DAY 0x0D +#define MAX31335_DATE 0x0E +#define MAX31335_MONTH 0x0F +#define MAX31335_YEAR 0x0F +#define MAX31335_ALM1_SEC 0x11 +#define MAX31335_ALM1_MIN 0x12 +#define MAX31335_ALM1_HRS 0x13 +#define MAX31335_ALM1_DAY_DATE 0x14 +#define MAX31335_ALM1_MON 0x15 +#define MAX31335_ALM1_YEAR 0x16 +#define MAX31335_ALM2_MIN 0x17 +#define MAX31335_ALM2_HRS 0x18 +#define MAX31335_ALM2_DAY_DATE 0x19 +#define MAX31335_TIMER_COUNT 0x1A +#define MAX31335_TIMER_INIT 0x1B +#define MAX31335_PWR_MGMT 0x1C +#define MAX31335_TRICKLE_REG 0x1D +#define MAX31335_AGING_OFFSET 0x1E +#define MAX31335_TS_CONFIG 0x30 +#define MAX31335_TEMP_ALARM_HIGH_MSB 0x31 +#define MAX31335_TEMP_ALARM_HIGH_LSB 0x32 +#define MAX31335_TEMP_ALARM_LOW_MSB 0x33 +#define MAX31335_TEMP_ALARM_LOW_LSB 0x34 +#define MAX31335_TEMP_DATA_MSB 0x35 +#define MAX31335_TEMP_DATA_LSB 0x36 +#define MAX31335_TS0_SEC_1_128 0x40 +#define MAX31335_TS0_SEC 0x41 +#define MAX31335_TS0_MIN 0x42 +#define MAX31335_TS0_HOUR 0x43 +#define MAX31335_TS0_DATE 0x44 +#define MAX31335_TS0_MONTH 0x45 +#define MAX31335_TS0_YEAR 0x46 +#define MAX31335_TS0_FLAGS 0x47 +#define MAX31335_TS1_SEC_1_128 0x48 +#define MAX31335_TS1_SEC 0x49 +#define MAX31335_TS1_MIN 0x4A +#define MAX31335_TS1_HOUR 0x4B +#define MAX31335_TS1_DATE 0x4C +#define MAX31335_TS1_MONTH 0x4D +#define MAX31335_TS1_YEAR 0x4E +#define MAX31335_TS1_FLAGS 0x4F +#define MAX31335_TS2_SEC_1_128 0x50 +#define MAX31335_TS2_SEC 0x51 +#define MAX31335_TS2_MIN 0x52 +#define MAX31335_TS2_HOUR 0x53 +#define MAX31335_TS2_DATE 0x54 +#define MAX31335_TS2_MONTH 0x55 +#define MAX31335_TS2_YEAR 0x56 +#define MAX31335_TS2_FLAGS 0x57 +#define MAX31335_TS3_SEC_1_128 0x58 +#define MAX31335_TS3_SEC 0x59 +#define MAX31335_TS3_MIN 0x5A +#define MAX31335_TS3_HOUR 0x5B +#define MAX31335_TS3_DATE 0x5C +#define MAX31335_TS3_MONTH 0x5D +#define MAX31335_TS3_YEAR 0x5E +#define MAX31335_TS3_FLAGS 0x5F + +/* MAX31335_STATUS1 Bit Definitions */ +#define MAX31335_STATUS1_PSDECT BIT(7) +#define MAX31335_STATUS1_OSF BIT(6) +#define MAX31335_STATUS1_PFAIL BIT(5) +#define MAX31335_STATUS1_VBATLOW BIT(4) +#define MAX31335_STATUS1_DIF BIT(3) +#define MAX31335_STATUS1_TIF BIT(2) +#define MAX31335_STATUS1_A2F BIT(1) +#define MAX31335_STATUS1_A1F BIT(0) + +/* MAX31335_INT_EN1 Bit Definitions */ +#define MAX31335_INT_EN1_DOSF BIT(6) +#define MAX31335_INT_EN1_PFAILE BIT(5) +#define MAX31335_INT_EN1_VBATLOWE BIT(4) +#define MAX31335_INT_EN1_DIE BIT(3) +#define MAX31335_INT_EN1_TIE BIT(2) +#define MAX31335_INT_EN1_A2IE BIT(1) +#define MAX31335_INT_EN1_A1IE BIT(0) + +/* MAX31335_STATUS2 Bit Definitions */ +#define MAX31335_STATUS2_TEMP_RDY BIT(2) +#define MAX31335_STATUS2_OTF BIT(1) +#define MAX31335_STATUS2_UTF BIT(0) + +/* MAX31335_INT_EN2 Bit Definitions */ +#define MAX31335_INT_EN2_TEMP_RDY_EN BIT(2) +#define MAX31335_INT_EN2_OTIE BIT(1) +#define MAX31335_INT_EN2_UTIE BIT(0) + +/* MAX31335_RTC_RESET Bit Definitions */ +#define MAX31335_RTC_RESET_SWRST BIT(0) + +/* MAX31335_RTC_CONFIG1 Bit Definitions */ +#define MAX31335_RTC_CONFIG1_EN_IO BIT(6) +#define MAX31335_RTC_CONFIG1_A1AC GENMASK(5, 4) +#define MAX31335_RTC_CONFIG1_DIP BIT(3) +#define MAX31335_RTC_CONFIG1_I2C_TIMEOUT BIT(1) +#define MAX31335_RTC_CONFIG1_EN_OSC BIT(0) + +/* MAX31335_RTC_CONFIG2 Bit Definitions */ +#define MAX31335_RTC_CONFIG2_ENCLKO BIT(2) +#define MAX31335_RTC_CONFIG2_CLKO_HZ GENMASK(1, 0) + +/* MAX31335_TIMESTAMP_CONFIG Bit Definitions */ +#define MAX31335_TIMESTAMP_CONFIG_TSVLOW BIT(5) +#define MAX31335_TIMESTAMP_CONFIG_TSPWM BIT(4) +#define MAX31335_TIMESTAMP_CONFIG_TSDIN BIT(3) +#define MAX31335_TIMESTAMP_CONFIG_TSOW BIT(2) +#define MAX31335_TIMESTAMP_CONFIG_TSR BIT(1) +#define MAX31335_TIMESTAMP_CONFIG_TSE BIT(0) + +/* MAX31335_TIMER_CONFIG Bit Definitions */ +#define MAX31335_TIMER_CONFIG_TE BIT(4) +#define MAX31335_TIMER_CONFIG_TPAUSE BIT(3) +#define MAX31335_TIMER_CONFIG_TRPT BIT(2) +#define MAX31335_TIMER_CONFIG_TFS GENMASK(1, 0) + +/* MAX31335_HOURS Bit Definitions */ +#define MAX31335_HOURS_F_24_12 BIT(6) +#define MAX31335_HOURS_HR_20_AM_PM BIT(5) + +/* MAX31335_MONTH Bit Definitions */ +#define MAX31335_MONTH_CENTURY BIT(7) + +/* MAX31335_PWR_MGMT Bit Definitions */ +#define MAX31335_PWR_MGMT_PFVT BIT(0) + +/* MAX31335_TRICKLE_REG Bit Definitions */ +#define MAX31335_TRICKLE_REG_TRICKLE GENMASK(3, 1) +#define MAX31335_TRICKLE_REG_EN_TRICKLE BIT(0) + +/* MAX31335_TS_CONFIG Bit Definitions */ +#define MAX31335_TS_CONFIG_AUTO BIT(4) +#define MAX31335_TS_CONFIG_CONVERT_T BIT(3) +#define MAX31335_TS_CONFIG_TSINT GENMASK(2, 0) + +/* MAX31335_TS_FLAGS Bit Definitions */ +#define MAX31335_TS_FLAGS_VLOWF BIT(3) +#define MAX31335_TS_FLAGS_VBATF BIT(2) +#define MAX31335_TS_FLAGS_VCCF BIT(1) +#define MAX31335_TS_FLAGS_DINF BIT(0) + +/* MAX31335 Miscellaneous Definitions */ +#define MAX31335_TRICKLE_SCHOTTKY_DIODE 1 +#define MAX31335_TRICKLE_STANDARD_DIODE 4 +#define MAX31335_RAM_SIZE 32 +#define MAX31335_TIME_SIZE 0x07 + +#define clk_hw_to_max31335(_hw) container_of(_hw, struct max31335_data, clkout) + +struct max31335_data { + struct regmap *regmap; + struct rtc_device *rtc; + struct clk_hw clkout; +}; + +static const int max31335_clkout_freq[] = { 1, 64, 1024, 32768 }; + +static const u16 max31335_trickle_resistors[] = {3000, 6000, 11000}; + +static bool max31335_volatile_reg(struct device *dev, unsigned int reg) +{ + /* time keeping registers */ + if (reg >= MAX31335_SECONDS && + reg < MAX31335_SECONDS + MAX31335_TIME_SIZE) + return true; + + /* interrupt status register */ + if (reg == MAX31335_INT_EN1_A1IE) + return true; + + /* temperature registers */ + if (reg == MAX31335_TEMP_DATA_MSB || reg == MAX31335_TEMP_DATA_LSB) + return true; + + return false; +} + +static const struct regmap_config regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x5F, + .volatile_reg = max31335_volatile_reg, +}; + +static int max31335_read_time(struct device *dev, struct rtc_time *tm) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + u8 date[7]; + int ret; + + ret = regmap_bulk_read(max31335->regmap, MAX31335_SECONDS, date, + sizeof(date)); + if (ret) + return ret; + + tm->tm_sec = bcd2bin(date[0] & 0x7f); + tm->tm_min = bcd2bin(date[1] & 0x7f); + tm->tm_hour = bcd2bin(date[2] & 0x3f); + tm->tm_wday = bcd2bin(date[3] & 0x7) - 1; + tm->tm_mday = bcd2bin(date[4] & 0x3f); + tm->tm_mon = bcd2bin(date[5] & 0x1f) - 1; + tm->tm_year = bcd2bin(date[6]) + 100; + + if (FIELD_GET(MAX31335_MONTH_CENTURY, date[5])) + tm->tm_year += 100; + + return 0; +} + +static int max31335_set_time(struct device *dev, struct rtc_time *tm) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + u8 date[7]; + + date[0] = bin2bcd(tm->tm_sec); + date[1] = bin2bcd(tm->tm_min); + date[2] = bin2bcd(tm->tm_hour); + date[3] = bin2bcd(tm->tm_wday + 1); + date[4] = bin2bcd(tm->tm_mday); + date[5] = bin2bcd(tm->tm_mon + 1); + date[6] = bin2bcd(tm->tm_year % 100); + + if (tm->tm_year >= 200) + date[5] |= FIELD_PREP(MAX31335_MONTH_CENTURY, 1); + + return regmap_bulk_write(max31335->regmap, MAX31335_SECONDS, date, + sizeof(date)); +} + +static int max31335_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + int ret, ctrl, status; + struct rtc_time time; + u8 regs[6]; + + ret = regmap_bulk_read(max31335->regmap, MAX31335_ALM1_SEC, regs, + sizeof(regs)); + if (ret) + return ret; + + alrm->time.tm_sec = bcd2bin(regs[0] & 0x7f); + alrm->time.tm_min = bcd2bin(regs[1] & 0x7f); + alrm->time.tm_hour = bcd2bin(regs[2] & 0x3f); + alrm->time.tm_mday = bcd2bin(regs[3] & 0x3f); + alrm->time.tm_mon = bcd2bin(regs[4] & 0x1f) - 1; + alrm->time.tm_year = bcd2bin(regs[5]) + 100; + + ret = max31335_read_time(dev, &time); + if (ret) + return ret; + + if (time.tm_year >= 200) + alrm->time.tm_year += 100; + + ret = regmap_read(max31335->regmap, MAX31335_INT_EN1, &ctrl); + if (ret) + return ret; + + ret = regmap_read(max31335->regmap, MAX31335_STATUS1, &status); + if (ret) + return ret; + + alrm->enabled = FIELD_GET(MAX31335_INT_EN1_A1IE, ctrl); + alrm->pending = FIELD_GET(MAX31335_STATUS1_A1F, status); + + return 0; +} + +static int max31335_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + unsigned int reg; + u8 regs[6]; + int ret; + + regs[0] = bin2bcd(alrm->time.tm_sec); + regs[1] = bin2bcd(alrm->time.tm_min); + regs[2] = bin2bcd(alrm->time.tm_hour); + regs[3] = bin2bcd(alrm->time.tm_mday); + regs[4] = bin2bcd(alrm->time.tm_mon + 1); + regs[5] = bin2bcd(alrm->time.tm_year % 100); + + ret = regmap_bulk_write(max31335->regmap, MAX31335_ALM1_SEC, + regs, sizeof(regs)); + if (ret) + return ret; + + reg = FIELD_PREP(MAX31335_INT_EN1_A1IE, alrm->enabled); + ret = regmap_update_bits(max31335->regmap, MAX31335_INT_EN1, + MAX31335_INT_EN1_A1IE, reg); + if (ret) + return ret; + + ret = regmap_update_bits(max31335->regmap, MAX31335_STATUS1, + MAX31335_STATUS1_A1F, 0); + + return 0; +} + +static int max31335_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + + return regmap_update_bits(max31335->regmap, MAX31335_INT_EN1, + MAX31335_INT_EN1_A1IE, enabled); +} + +static irqreturn_t max31335_handle_irq(int irq, void *dev_id) +{ + struct max31335_data *max31335 = dev_id; + bool status; + int ret; + + ret = regmap_update_bits_check(max31335->regmap, MAX31335_STATUS1, + MAX31335_STATUS1_A1F, 0, &status); + if (ret) + return IRQ_HANDLED; + + if (status) + rtc_update_irq(max31335->rtc, 1, RTC_AF | RTC_IRQF); + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops max31335_rtc_ops = { + .read_time = max31335_read_time, + .set_time = max31335_set_time, + .read_alarm = max31335_read_alarm, + .set_alarm = max31335_set_alarm, + .alarm_irq_enable = max31335_alarm_irq_enable, +}; + +static int max31335_trickle_charger_setup(struct device *dev, + struct max31335_data *max31335) +{ + u32 ohms, chargeable; + int i, trickle_cfg; + const char *diode; + + if (device_property_read_u32(dev, "aux-voltage-chargeable", + &chargeable)) + return 0; + + if (device_property_read_u32(dev, "trickle-resistor-ohms", &ohms)) + return 0; + + if (device_property_read_string(dev, "adi,tc-diode", &diode)) + return 0; + + if (!strcmp(diode, "schottky")) + trickle_cfg = MAX31335_TRICKLE_SCHOTTKY_DIODE; + else if (!strcmp(diode, "standard+schottky")) + trickle_cfg = MAX31335_TRICKLE_STANDARD_DIODE; + else + return dev_err_probe(dev, -EINVAL, + "Invalid tc-diode value: %s\n", diode); + + for (i = 0; i < ARRAY_SIZE(max31335_trickle_resistors); i++) + if (ohms == max31335_trickle_resistors[i]) + break; + + if (i >= ARRAY_SIZE(max31335_trickle_resistors)) + return 0; + + i = i + trickle_cfg; + + return regmap_write(max31335->regmap, MAX31335_TRICKLE_REG, + FIELD_PREP(MAX31335_TRICKLE_REG_TRICKLE, i) | + FIELD_PREP(MAX31335_TRICKLE_REG_EN_TRICKLE, + chargeable)); +} + +static unsigned long max31335_clkout_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct max31335_data *max31335 = clk_hw_to_max31335(hw); + unsigned int freq_mask; + unsigned int reg; + int ret; + + ret = regmap_read(max31335->regmap, MAX31335_RTC_CONFIG2, ®); + if (ret) + return 0; + + freq_mask = __roundup_pow_of_two(ARRAY_SIZE(max31335_clkout_freq)) - 1; + + return max31335_clkout_freq[reg & freq_mask]; +} + +static long max31335_clkout_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + int index; + + index = find_closest(rate, max31335_clkout_freq, + ARRAY_SIZE(max31335_clkout_freq)); + + return max31335_clkout_freq[index]; +} + +static int max31335_clkout_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct max31335_data *max31335 = clk_hw_to_max31335(hw); + unsigned int freq_mask; + int index; + + index = find_closest(rate, max31335_clkout_freq, + ARRAY_SIZE(max31335_clkout_freq)); + freq_mask = __roundup_pow_of_two(ARRAY_SIZE(max31335_clkout_freq)) - 1; + + return regmap_update_bits(max31335->regmap, MAX31335_RTC_CONFIG2, + freq_mask, index); +} + +static int max31335_clkout_enable(struct clk_hw *hw) +{ + struct max31335_data *max31335 = clk_hw_to_max31335(hw); + + return regmap_set_bits(max31335->regmap, MAX31335_RTC_CONFIG2, + MAX31335_RTC_CONFIG2_ENCLKO); +} + +static void max31335_clkout_disable(struct clk_hw *hw) +{ + struct max31335_data *max31335 = clk_hw_to_max31335(hw); + + regmap_clear_bits(max31335->regmap, MAX31335_RTC_CONFIG2, + MAX31335_RTC_CONFIG2_ENCLKO); +} + +static int max31335_clkout_is_enabled(struct clk_hw *hw) +{ + struct max31335_data *max31335 = clk_hw_to_max31335(hw); + unsigned int reg; + int ret; + + ret = regmap_read(max31335->regmap, MAX31335_RTC_CONFIG2, ®); + if (ret) + return ret; + + return !!(reg & MAX31335_RTC_CONFIG2_ENCLKO); +} + +static const struct clk_ops max31335_clkout_ops = { + .recalc_rate = max31335_clkout_recalc_rate, + .round_rate = max31335_clkout_round_rate, + .set_rate = max31335_clkout_set_rate, + .enable = max31335_clkout_enable, + .disable = max31335_clkout_disable, + .is_enabled = max31335_clkout_is_enabled, +}; + +static struct clk_init_data max31335_clk_init = { + .name = "max31335-clkout", + .ops = &max31335_clkout_ops, +}; + +static int max31335_nvmem_reg_read(void *priv, unsigned int offset, + void *val, size_t bytes) +{ + struct max31335_data *max31335 = priv; + unsigned int reg = MAX31335_TS0_SEC_1_128 + offset; + + return regmap_bulk_read(max31335->regmap, reg, val, bytes); +} + +static int max31335_nvmem_reg_write(void *priv, unsigned int offset, + void *val, size_t bytes) +{ + struct max31335_data *max31335 = priv; + unsigned int reg = MAX31335_TS0_SEC_1_128 + offset; + + return regmap_bulk_write(max31335->regmap, reg, val, bytes); +} + +static struct nvmem_config max31335_nvmem_cfg = { + .reg_read = max31335_nvmem_reg_read, + .reg_write = max31335_nvmem_reg_write, + .word_size = 8, + .size = MAX31335_RAM_SIZE, +}; + +#if IS_REACHABLE(HWMON) +static int max31335_read_temp(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + u8 reg[2]; + s16 temp; + int ret; + + if (type != hwmon_temp || attr != hwmon_temp_input) + return -EOPNOTSUPP; + + ret = regmap_bulk_read(max31335->regmap, MAX31335_TEMP_DATA_MSB, + reg, 2); + if (ret) + return ret; + + temp = get_unaligned_be16(reg); + + *val = (temp / 64) * 250; + + return 0; +} + +static umode_t max31335_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type == hwmon_temp && attr == hwmon_temp_input) + return 0444; + + return 0; +} + +static const struct hwmon_channel_info *max31335_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + NULL +}; + +static const struct hwmon_ops max31335_hwmon_ops = { + .is_visible = max31335_is_visible, + .read = max31335_read_temp, +}; + +static const struct hwmon_chip_info max31335_chip_info = { + .ops = &max31335_hwmon_ops, + .info = max31335_info, +}; +#endif + +static int max31335_clkout_register(struct device *dev) +{ + struct max31335_data *max31335 = dev_get_drvdata(dev); + int ret; + + if (!device_property_present(dev, "#clock-cells")) + return regmap_clear_bits(max31335->regmap, MAX31335_RTC_CONFIG2, + MAX31335_RTC_CONFIG2_ENCLKO); + + max31335->clkout.init = &max31335_clk_init; + + ret = devm_clk_hw_register(dev, &max31335->clkout); + if (ret) + return dev_err_probe(dev, ret, "cannot register clock\n"); + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, + &max31335->clkout); + if (ret) + return dev_err_probe(dev, ret, "cannot add hw provider\n"); + + max31335->clkout.clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(max31335->clkout.clk)) + return dev_err_probe(dev, PTR_ERR(max31335->clkout.clk), + "cannot enable clkout\n"); + + return 0; +} + +static int max31335_probe(struct i2c_client *client) +{ + struct max31335_data *max31335; +#if IS_REACHABLE(HWMON) + struct device *hwmon; +#endif + int ret; + + max31335 = devm_kzalloc(&client->dev, sizeof(*max31335), GFP_KERNEL); + if (!max31335) + return -ENOMEM; + + max31335->regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(max31335->regmap)) + return PTR_ERR(max31335->regmap); + + i2c_set_clientdata(client, max31335); + + max31335->rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(max31335->rtc)) + return PTR_ERR(max31335->rtc); + + max31335->rtc->ops = &max31335_rtc_ops; + max31335->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + max31335->rtc->range_max = RTC_TIMESTAMP_END_2199; + max31335->rtc->alarm_offset_max = 24 * 60 * 60; + + ret = max31335_clkout_register(&client->dev); + if (ret) + return ret; + + if (client->irq > 0) { + ret = devm_request_threaded_irq(&client->dev, client->irq, + NULL, max31335_handle_irq, + IRQF_ONESHOT, + "max31335", max31335); + if (ret) { + dev_warn(&client->dev, + "unable to request IRQ, alarm max31335 disabled\n"); + client->irq = 0; + } + } + + if (!client->irq) + clear_bit(RTC_FEATURE_ALARM, max31335->rtc->features); + + max31335_nvmem_cfg.priv = max31335; + ret = devm_rtc_nvmem_register(max31335->rtc, &max31335_nvmem_cfg); + if (ret) + return dev_err_probe(&client->dev, ret, + "cannot register rtc nvmem\n"); + +#if IS_REACHABLE(HWMON) + hwmon = devm_hwmon_device_register_with_info(&client->dev, client->name, + max31335, + &max31335_chip_info, + NULL); + if (IS_ERR(hwmon)) + return dev_err_probe(&client->dev, PTR_ERR(hwmon), + "cannot register hwmon device\n"); +#endif + + ret = max31335_trickle_charger_setup(&client->dev, max31335); + if (ret) + return ret; + + return devm_rtc_register_device(max31335->rtc); +} + +static const struct i2c_device_id max31335_id[] = { + { "max31335", 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, max31335_id); + +static const struct of_device_id max31335_of_match[] = { + { .compatible = "adi,max31335" }, + { } +}; + +MODULE_DEVICE_TABLE(of, max31335_of_match); + +static struct i2c_driver max31335_driver = { + .driver = { + .name = "rtc-max31335", + .of_match_table = max31335_of_match, + }, + .probe = max31335_probe, + .id_table = max31335_id, +}; +module_i2c_driver(max31335_driver); + +MODULE_AUTHOR("Antoniu Miclaus "); +MODULE_DESCRIPTION("MAX31335 RTC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index f1c09f1db044c8481fca7ddb8801c952954e246e..651bf3c279c7462151096cf8565a56fffab9052d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,26 +8,31 @@ #include #endif +#define UIP_RECHECK_DELAY 100 /* usec */ +#define UIP_RECHECK_DELAY_MS (USEC_PER_MSEC / UIP_RECHECK_DELAY) +#define UIP_RECHECK_LOOPS_MS(x) (x / UIP_RECHECK_DELAY_MS) + /* * Execute a function while the UIP (Update-in-progress) bit of the RTC is - * unset. + * unset. The timeout is configurable by the caller in ms. * * Warning: callback may be executed more then once. */ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param) { int i; unsigned long flags; unsigned char seconds; - for (i = 0; i < 100; i++) { + for (i = 0; UIP_RECHECK_LOOPS_MS(i) < timeout; i++) { spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll - * every 100 usec for completion. + * for completion. * * Store the second value before checking UIP so a long lasting * NMI which happens to hit after the UIP check cannot make @@ -37,7 +42,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -56,7 +61,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), */ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -72,6 +77,10 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), } spin_unlock_irqrestore(&rtc_lock, flags); + if (UIP_RECHECK_LOOPS_MS(i) >= 100) + pr_warn("Reading current time from RTC took around %li ms\n", + UIP_RECHECK_LOOPS_MS(i)); + return true; } return false; @@ -84,7 +93,7 @@ EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); */ bool mc146818_does_rtc_work(void) { - return mc146818_avoid_UIP(NULL, NULL); + return mc146818_avoid_UIP(NULL, 1000, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); @@ -130,15 +139,27 @@ static void mc146818_get_time_callback(unsigned char seconds, void *param_in) p->ctrl = CMOS_READ(RTC_CONTROL); } -int mc146818_get_time(struct rtc_time *time) +/** + * mc146818_get_time - Get the current time from the RTC + * @time: pointer to struct rtc_time to store the current time + * @timeout: timeout value in ms + * + * This function reads the current time from the RTC and stores it in the + * provided struct rtc_time. The timeout parameter specifies the maximum + * time to wait for the RTC to become ready. + * + * Return: 0 on success, -ETIMEDOUT if the RTC did not become ready within + * the specified timeout, or another error code if an error occurred. + */ +int mc146818_get_time(struct rtc_time *time, int timeout) { struct mc146818_get_time_callback_param p = { .time = time }; - if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) { + if (!mc146818_avoid_UIP(mc146818_get_time_callback, timeout, &p)) { memset(time, 0, sizeof(*time)); - return -EIO; + return -ETIMEDOUT; } if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index ed4e606be8e58d2779bd6aff148b388435770830..f488a189a4651ffead1bbcc1e4e9d3a5cd01d5df 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -23,6 +23,7 @@ #define NCT3018Y_REG_CTRL 0x0A /* timer control */ #define NCT3018Y_REG_ST 0x0B /* status */ #define NCT3018Y_REG_CLKO 0x0C /* clock out */ +#define NCT3018Y_REG_PART 0x21 /* part info */ #define NCT3018Y_BIT_AF BIT(7) #define NCT3018Y_BIT_ST BIT(7) @@ -37,10 +38,12 @@ #define NCT3018Y_REG_BAT_MASK 0x07 #define NCT3018Y_REG_CLKO_F_MASK 0x03 /* frequenc mask */ #define NCT3018Y_REG_CLKO_CKE 0x80 /* clock out enabled */ +#define NCT3018Y_REG_PART_NCT3018Y 0x02 struct nct3018y { struct rtc_device *rtc; struct i2c_client *client; + int part_num; #ifdef CONFIG_COMMON_CLK struct clk_hw clkout_hw; #endif @@ -177,8 +180,27 @@ static int nct3018y_rtc_read_time(struct device *dev, struct rtc_time *tm) static int nct3018y_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct i2c_client *client = to_i2c_client(dev); + struct nct3018y *nct3018y = dev_get_drvdata(dev); unsigned char buf[4] = {0}; - int err; + int err, flags; + int restore_flags = 0; + + flags = i2c_smbus_read_byte_data(client, NCT3018Y_REG_CTRL); + if (flags < 0) { + dev_dbg(&client->dev, "Failed to read NCT3018Y_REG_CTRL.\n"); + return flags; + } + + /* Check and set TWO bit */ + if (nct3018y->part_num == NCT3018Y_REG_PART_NCT3018Y && !(flags & NCT3018Y_BIT_TWO)) { + restore_flags = 1; + flags |= NCT3018Y_BIT_TWO; + err = i2c_smbus_write_byte_data(client, NCT3018Y_REG_CTRL, flags); + if (err < 0) { + dev_dbg(&client->dev, "Unable to write NCT3018Y_REG_CTRL.\n"); + return err; + } + } buf[0] = bin2bcd(tm->tm_sec); err = i2c_smbus_write_byte_data(client, NCT3018Y_REG_SC, buf[0]); @@ -212,6 +234,18 @@ static int nct3018y_rtc_set_time(struct device *dev, struct rtc_time *tm) return -EIO; } + /* Restore TWO bit */ + if (restore_flags) { + if (nct3018y->part_num == NCT3018Y_REG_PART_NCT3018Y) + flags &= ~NCT3018Y_BIT_TWO; + + err = i2c_smbus_write_byte_data(client, NCT3018Y_REG_CTRL, flags); + if (err < 0) { + dev_dbg(&client->dev, "Unable to write NCT3018Y_REG_CTRL.\n"); + return err; + } + } + return err; } @@ -479,11 +513,17 @@ static int nct3018y_probe(struct i2c_client *client) dev_dbg(&client->dev, "%s: NCT3018Y_BIT_TWO is set\n", __func__); } - flags = NCT3018Y_BIT_TWO; - err = i2c_smbus_write_byte_data(client, NCT3018Y_REG_CTRL, flags); - if (err < 0) { - dev_dbg(&client->dev, "Unable to write NCT3018Y_REG_CTRL\n"); - return err; + nct3018y->part_num = i2c_smbus_read_byte_data(client, NCT3018Y_REG_PART); + if (nct3018y->part_num < 0) { + dev_dbg(&client->dev, "Failed to read NCT3018Y_REG_PART.\n"); + return nct3018y->part_num; + } else if (nct3018y->part_num == NCT3018Y_REG_PART_NCT3018Y) { + flags = NCT3018Y_BIT_HF; + err = i2c_smbus_write_byte_data(client, NCT3018Y_REG_CTRL, flags); + if (err < 0) { + dev_dbg(&client->dev, "Unable to write NCT3018Y_REG_CTRL.\n"); + return err; + } } flags = 0; diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 1a3ec1bb5b8148dd945ad127f45e657cd4c5a882..1327251e527c21cae4a247a62791ec3cbf0df067 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -17,6 +17,7 @@ #include #include #include +#include #define RV8803_I2C_TRY_COUNT 4 @@ -607,6 +608,28 @@ static int rv8803_regs_configure(struct rv8803_data *rv8803) return 0; } +static int rv8803_resume(struct device *dev) +{ + struct rv8803_data *rv8803 = dev_get_drvdata(dev); + + if (rv8803->client->irq > 0 && device_may_wakeup(dev)) + disable_irq_wake(rv8803->client->irq); + + return 0; +} + +static int rv8803_suspend(struct device *dev) +{ + struct rv8803_data *rv8803 = dev_get_drvdata(dev); + + if (rv8803->client->irq > 0 && device_may_wakeup(dev)) + enable_irq_wake(rv8803->client->irq); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(rv8803_pm_ops, rv8803_suspend, rv8803_resume); + static const struct i2c_device_id rv8803_id[] = { { "rv8803", rv_8803 }, { "rv8804", rx_8804 }, @@ -683,10 +706,18 @@ static int rv8803_probe(struct i2c_client *client) if (err) { dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n"); client->irq = 0; + } else { + device_init_wakeup(&client->dev, true); + err = dev_pm_set_wake_irq(&client->dev, client->irq); + if (err) + dev_err(&client->dev, "failed to set wake IRQ\n"); } + } else { + if (device_property_read_bool(&client->dev, "wakeup-source")) + device_init_wakeup(&client->dev, true); + else + clear_bit(RTC_FEATURE_ALARM, rv8803->rtc->features); } - if (!client->irq) - clear_bit(RTC_FEATURE_ALARM, rv8803->rtc->features); if (of_property_read_bool(client->dev.of_node, "epson,vdet-disable")) rv8803->backup |= RX8900_FLAG_VDETOFF; @@ -737,6 +768,7 @@ static struct i2c_driver rv8803_driver = { .driver = { .name = "rtc-rv8803", .of_match_table = of_match_ptr(rv8803_of_match), + .pm = &rv8803_pm_ops, }, .probe = rv8803_probe, .id_table = rv8803_id, diff --git a/drivers/rtc/rtc-tps6594.c b/drivers/rtc/rtc-tps6594.c new file mode 100644 index 0000000000000000000000000000000000000000..838ae8562a351f31f0e32d51bf751f184a92f491 --- /dev/null +++ b/drivers/rtc/rtc-tps6594.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RTC driver for tps6594 PMIC + * + * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Total number of RTC registers needed to set time +#define NUM_TIME_REGS (TPS6594_REG_RTC_WEEKS - TPS6594_REG_RTC_SECONDS + 1) + +// Total number of RTC alarm registers +#define NUM_TIME_ALARM_REGS (NUM_TIME_REGS - 1) + +/* + * Min and max values supported by 'offset' interface (swapped sign). + * After conversion, the values do not exceed the range [-32767, 33767] + * which COMP_REG must conform to. + */ +#define MIN_OFFSET (-277774) +#define MAX_OFFSET (277774) + +// Number of ticks per hour +#define TICKS_PER_HOUR (32768 * 3600) + +// Multiplier for ppb conversions +#define PPB_MULT NANO + +static int tps6594_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct tps6594 *tps = dev_get_drvdata(dev->parent); + u8 val; + + val = enabled ? TPS6594_BIT_IT_ALARM : 0; + + return regmap_update_bits(tps->regmap, TPS6594_REG_RTC_INTERRUPTS, + TPS6594_BIT_IT_ALARM, val); +} + +/* Pulse GET_TIME field of RTC_CTRL_1 to store a timestamp in shadow registers. */ +static int tps6594_rtc_shadow_timestamp(struct device *dev, struct tps6594 *tps) +{ + int ret; + + /* + * Set GET_TIME to 0. Next time we set GET_TIME to 1 we will be sure to store + * an up-to-date timestamp. + */ + ret = regmap_clear_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_GET_TIME); + if (ret < 0) + return ret; + + /* + * Copy content of RTC registers to shadow registers or latches to read + * a coherent timestamp. + */ + return regmap_set_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_GET_TIME); +} + +static int tps6594_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + unsigned char rtc_data[NUM_TIME_REGS]; + struct tps6594 *tps = dev_get_drvdata(dev->parent); + int ret; + + // Check if RTC is running. + ret = regmap_test_bits(tps->regmap, TPS6594_REG_RTC_STATUS, + TPS6594_BIT_RUN); + if (ret < 0) + return ret; + if (ret == 0) + return -EINVAL; + + ret = tps6594_rtc_shadow_timestamp(dev, tps); + if (ret < 0) + return ret; + + // Read shadowed RTC registers. + ret = regmap_bulk_read(tps->regmap, TPS6594_REG_RTC_SECONDS, rtc_data, + NUM_TIME_REGS); + if (ret < 0) + return ret; + + tm->tm_sec = bcd2bin(rtc_data[0]); + tm->tm_min = bcd2bin(rtc_data[1]); + tm->tm_hour = bcd2bin(rtc_data[2]); + tm->tm_mday = bcd2bin(rtc_data[3]); + tm->tm_mon = bcd2bin(rtc_data[4]) - 1; + tm->tm_year = bcd2bin(rtc_data[5]) + 100; + tm->tm_wday = bcd2bin(rtc_data[6]); + + return 0; +} + +static int tps6594_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + unsigned char rtc_data[NUM_TIME_REGS]; + struct tps6594 *tps = dev_get_drvdata(dev->parent); + int ret; + + rtc_data[0] = bin2bcd(tm->tm_sec); + rtc_data[1] = bin2bcd(tm->tm_min); + rtc_data[2] = bin2bcd(tm->tm_hour); + rtc_data[3] = bin2bcd(tm->tm_mday); + rtc_data[4] = bin2bcd(tm->tm_mon + 1); + rtc_data[5] = bin2bcd(tm->tm_year - 100); + rtc_data[6] = bin2bcd(tm->tm_wday); + + // Stop RTC while updating the RTC time registers. + ret = regmap_clear_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_STOP_RTC); + if (ret < 0) + return ret; + + // Update all the time registers in one shot. + ret = regmap_bulk_write(tps->regmap, TPS6594_REG_RTC_SECONDS, rtc_data, + NUM_TIME_REGS); + if (ret < 0) + return ret; + + // Start back RTC. + return regmap_set_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_STOP_RTC); +} + +static int tps6594_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + unsigned char alarm_data[NUM_TIME_ALARM_REGS]; + u32 int_val; + struct tps6594 *tps = dev_get_drvdata(dev->parent); + int ret; + + ret = regmap_bulk_read(tps->regmap, TPS6594_REG_ALARM_SECONDS, + alarm_data, NUM_TIME_ALARM_REGS); + if (ret < 0) + return ret; + + alm->time.tm_sec = bcd2bin(alarm_data[0]); + alm->time.tm_min = bcd2bin(alarm_data[1]); + alm->time.tm_hour = bcd2bin(alarm_data[2]); + alm->time.tm_mday = bcd2bin(alarm_data[3]); + alm->time.tm_mon = bcd2bin(alarm_data[4]) - 1; + alm->time.tm_year = bcd2bin(alarm_data[5]) + 100; + + ret = regmap_read(tps->regmap, TPS6594_REG_RTC_INTERRUPTS, &int_val); + if (ret < 0) + return ret; + + alm->enabled = int_val & TPS6594_BIT_IT_ALARM; + + return 0; +} + +static int tps6594_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + unsigned char alarm_data[NUM_TIME_ALARM_REGS]; + struct tps6594 *tps = dev_get_drvdata(dev->parent); + int ret; + + // Disable alarm irq before changing the alarm timestamp. + ret = tps6594_rtc_alarm_irq_enable(dev, 0); + if (ret) + return ret; + + alarm_data[0] = bin2bcd(alm->time.tm_sec); + alarm_data[1] = bin2bcd(alm->time.tm_min); + alarm_data[2] = bin2bcd(alm->time.tm_hour); + alarm_data[3] = bin2bcd(alm->time.tm_mday); + alarm_data[4] = bin2bcd(alm->time.tm_mon + 1); + alarm_data[5] = bin2bcd(alm->time.tm_year - 100); + + // Update all the alarm registers in one shot. + ret = regmap_bulk_write(tps->regmap, TPS6594_REG_ALARM_SECONDS, + alarm_data, NUM_TIME_ALARM_REGS); + if (ret < 0) + return ret; + + if (alm->enabled) + ret = tps6594_rtc_alarm_irq_enable(dev, 1); + + return ret; +} + +static int tps6594_rtc_set_calibration(struct device *dev, int calibration) +{ + struct tps6594 *tps = dev_get_drvdata(dev->parent); + __le16 value; + int ret; + + /* + * TPS6594 uses two's complement 16 bit value for compensation of RTC + * crystal inaccuracies. One time every hour when seconds counter + * increments from 0 to 1 compensation value will be added to internal + * RTC counter value. + * + * Valid range for compensation value: [-32767 .. 32767]. + */ + if (calibration < S16_MIN + 1 || calibration > S16_MAX) + return -ERANGE; + + value = cpu_to_le16(calibration); + + // Update all the compensation registers in one shot. + ret = regmap_bulk_write(tps->regmap, TPS6594_REG_RTC_COMP_LSB, &value, + sizeof(value)); + if (ret < 0) + return ret; + + // Enable automatic compensation. + return regmap_set_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_AUTO_COMP); +} + +static int tps6594_rtc_get_calibration(struct device *dev, int *calibration) +{ + struct tps6594 *tps = dev_get_drvdata(dev->parent); + unsigned int ctrl; + __le16 value; + int ret; + + ret = regmap_read(tps->regmap, TPS6594_REG_RTC_CTRL_1, &ctrl); + if (ret < 0) + return ret; + + // If automatic compensation is not enabled report back zero. + if (!(ctrl & TPS6594_BIT_AUTO_COMP)) { + *calibration = 0; + return 0; + } + + ret = regmap_bulk_read(tps->regmap, TPS6594_REG_RTC_COMP_LSB, &value, + sizeof(value)); + if (ret < 0) + return ret; + + *calibration = le16_to_cpu(value); + + return 0; +} + +static int tps6594_rtc_read_offset(struct device *dev, long *offset) +{ + int calibration; + s64 tmp; + int ret; + + ret = tps6594_rtc_get_calibration(dev, &calibration); + if (ret < 0) + return ret; + + // Convert from RTC calibration register format to ppb format. + tmp = calibration * PPB_MULT; + + if (tmp < 0) + tmp -= TICKS_PER_HOUR / 2LL; + else + tmp += TICKS_PER_HOUR / 2LL; + tmp = div_s64(tmp, TICKS_PER_HOUR); + + /* + * SAFETY: + * Computatiion is the reverse operation of the one done in + * `tps6594_rtc_set_offset`. The safety remarks applie here too. + */ + + /* + * Offset value operates in negative way, so swap sign. + * See 8.3.10.5, (32768 - COMP_REG). + */ + *offset = (long)-tmp; + + return 0; +} + +static int tps6594_rtc_set_offset(struct device *dev, long offset) +{ + int calibration; + s64 tmp; + + // Make sure offset value is within supported range. + if (offset < MIN_OFFSET || offset > MAX_OFFSET) + return -ERANGE; + + // Convert from ppb format to RTC calibration register format. + + tmp = offset * TICKS_PER_HOUR; + if (tmp < 0) + tmp -= PPB_MULT / 2LL; + else + tmp += PPB_MULT / 2LL; + tmp = div_s64(tmp, PPB_MULT); + + /* + * SAFETY: + * - tmp = offset * TICK_PER_HOUR : + * `offset` can't be more than 277774, so `tmp` can't exceed 277774000000000 + * which is lower than the maximum value in an `s64` (2^63-1). No overflow here. + * + * - tmp += TICK_PER_HOUR / 2LL : + * tmp will have a maximum value of 277774117964800 which is still inferior to 2^63-1. + */ + + // Offset value operates in negative way, so swap sign. + calibration = (int)-tmp; + + return tps6594_rtc_set_calibration(dev, calibration); +} + +static irqreturn_t tps6594_rtc_interrupt(int irq, void *rtc) +{ + struct device *dev = rtc; + struct tps6594 *tps = dev_get_drvdata(dev->parent); + struct rtc_device *rtc_dev = dev_get_drvdata(dev); + int ret; + u32 rtc_reg; + + ret = regmap_read(tps->regmap, TPS6594_REG_RTC_STATUS, &rtc_reg); + if (ret) + return IRQ_NONE; + + rtc_update_irq(rtc_dev, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops tps6594_rtc_ops = { + .read_time = tps6594_rtc_read_time, + .set_time = tps6594_rtc_set_time, + .read_alarm = tps6594_rtc_read_alarm, + .set_alarm = tps6594_rtc_set_alarm, + .alarm_irq_enable = tps6594_rtc_alarm_irq_enable, + .read_offset = tps6594_rtc_read_offset, + .set_offset = tps6594_rtc_set_offset, +}; + +static int tps6594_rtc_probe(struct platform_device *pdev) +{ + struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct rtc_device *rtc; + int irq; + int ret; + + rtc = devm_kzalloc(dev, sizeof(*rtc), GFP_KERNEL); + if (!rtc) + return -ENOMEM; + + rtc = devm_rtc_allocate_device(dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + // Enable crystal oscillator. + ret = regmap_set_bits(tps->regmap, TPS6594_REG_RTC_CTRL_2, + TPS6594_BIT_XTAL_EN); + if (ret < 0) + return ret; + + ret = regmap_test_bits(tps->regmap, TPS6594_REG_RTC_STATUS, + TPS6594_BIT_RUN); + if (ret < 0) + return ret; + // RTC not running. + if (ret == 0) { + ret = regmap_set_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_STOP_RTC); + if (ret < 0) + return ret; + + /* + * On some boards, a 40 ms delay is needed before BIT_RUN is set. + * 80 ms should provide sufficient margin. + */ + mdelay(80); + + /* + * RTC should be running now. Check if this is the case. + * If not it might be a missing oscillator. + */ + ret = regmap_test_bits(tps->regmap, TPS6594_REG_RTC_STATUS, + TPS6594_BIT_RUN); + if (ret < 0) + return ret; + if (ret == 0) + return -ENODEV; + + // Stop RTC until first call to `tps6594_rtc_set_time`. + ret = regmap_clear_bits(tps->regmap, TPS6594_REG_RTC_CTRL_1, + TPS6594_BIT_STOP_RTC); + if (ret < 0) + return ret; + } + + platform_set_drvdata(pdev, rtc); + + irq = platform_get_irq_byname(pdev, TPS6594_IRQ_NAME_ALARM); + if (irq < 0) + return dev_err_probe(dev, irq, "Failed to get irq\n"); + + ret = devm_request_threaded_irq(dev, irq, NULL, tps6594_rtc_interrupt, + IRQF_ONESHOT, TPS6594_IRQ_NAME_ALARM, + dev); + if (ret < 0) + return dev_err_probe(dev, ret, + "Failed to request_threaded_irq\n"); + + ret = device_init_wakeup(dev, true); + if (ret < 0) + return dev_err_probe(dev, ret, + "Failed to init rtc as wakeup source\n"); + + rtc->ops = &tps6594_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; + + return devm_rtc_register_device(rtc); +} + +static const struct platform_device_id tps6594_rtc_id_table[] = { + { "tps6594-rtc", }, + {} +}; +MODULE_DEVICE_TABLE(platform, tps6594_rtc_id_table); + +static struct platform_driver tps6594_rtc_driver = { + .probe = tps6594_rtc_probe, + .driver = { + .name = "tps6594-rtc", + }, + .id_table = tps6594_rtc_id_table, +}; + +module_platform_driver(tps6594_rtc_driver); +MODULE_AUTHOR("Esteban Blanc "); +MODULE_DESCRIPTION("TPS6594 RTC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index acb710d3d7bcd52261ea5b36b8a2be82f72f2926..983b3b16196c6b7638e023f121e5c937c83ce150 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -32,7 +32,8 @@ #define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */ -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static const struct vfio_device_ops vfio_ap_matrix_dev_ops; static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); @@ -665,17 +666,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) * device driver. * * @matrix_mdev: the matrix mdev whose matrix is to be filtered. + * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the + * guest's AP configuration that are still in the host's AP + * configuration. * * Note: If an APQN referencing a queue device that is not bound to the vfio_ap * driver, its APID will be filtered from the guest's APCB. The matrix * structure precludes filtering an individual APQN, so its APID will be - * filtered. + * filtered. Consequently, all queues associated with the adapter that + * are in the host's AP configuration must be reset. If queues are + * subsequently made available again to the guest, they should re-appear + * in a reset state * * Return: a boolean value indicating whether the KVM guest's APCB was changed * by the filtering or not. */ -static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, - struct ap_matrix_mdev *matrix_mdev) +static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_filtered) { unsigned long apid, apqi, apqn; DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); @@ -685,6 +692,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES); bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS); vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); + bitmap_clear(apm_filtered, 0, AP_DEVICES); /* * Copy the adapters, domains and control domains to the shadow_apcb @@ -696,8 +704,9 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's @@ -709,8 +718,16 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, apqn = AP_MKQID(apid, apqi); q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); if (!q || q->reset_status.response_code) { - clear_bit_inv(apid, - matrix_mdev->shadow_apcb.apm); + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); + + /* + * If the adapter was previously plugged into + * the guest, let's let the caller know that + * the APID was filtered. + */ + if (test_bit_inv(apid, prev_shadow_apm)) + set_bit_inv(apid, apm_filtered); + break; } } @@ -812,7 +829,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev) mutex_lock(&matrix_dev->guests_lock); mutex_lock(&matrix_dev->mdevs_lock); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); vfio_ap_mdev_unlink_fr_queues(matrix_mdev); list_del(&matrix_mdev->node); mutex_unlock(&matrix_dev->mdevs_lock); @@ -922,6 +939,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev, AP_MKQID(apid, apqi)); } +static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid, + struct list_head *qlist) +{ + struct vfio_ap_queue *q; + unsigned long apqi; + + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) { + q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi)); + if (q) + list_add_tail(&q->reset_qnode, qlist); + } +} + +static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + struct list_head qlist; + + INIT_LIST_HEAD(&qlist); + collect_queues_to_reset(matrix_mdev, apid, &qlist); + vfio_ap_mdev_reset_qlist(&qlist); +} + +static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_reset) +{ + struct list_head qlist; + unsigned long apid; + + if (bitmap_empty(apm_reset, AP_DEVICES)) + return 0; + + INIT_LIST_HEAD(&qlist); + + for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) + collect_queues_to_reset(matrix_mdev, apid, &qlist); + + return vfio_ap_mdev_reset_qlist(&qlist); +} + /** * assign_adapter_store - parses the APID from @buf and sets the * corresponding bit in the mediated matrix device's APM @@ -962,7 +1020,7 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - DECLARE_BITMAP(apm_delta, AP_DEVICES); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -991,12 +1049,11 @@ static ssize_t assign_adapter_store(struct device *dev, } vfio_ap_mdev_link_adapter(matrix_mdev, apid); - memset(apm_delta, 0, sizeof(apm_delta)); - set_bit_inv(apid, apm_delta); - if (vfio_ap_mdev_filter_matrix(apm_delta, - matrix_mdev->matrix.aqm, matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1027,11 +1084,12 @@ static struct vfio_ap_queue * adapter was assigned. * @matrix_mdev: the matrix mediated device to which the adapter was assigned. * @apid: the APID of the unassigned adapter. - * @qtable: table for storing queues associated with unassigned adapter. + * @qlist: list for storing queues associated with unassigned adapter that + * need to be reset. */ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apqi; struct vfio_ap_queue *q; @@ -1039,11 +1097,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1051,26 +1108,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist); if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1171,7 +1225,7 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - DECLARE_BITMAP(aqm_delta, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -1200,12 +1254,11 @@ static ssize_t assign_domain_store(struct device *dev, } vfio_ap_mdev_link_domain(matrix_mdev, apqi); - memset(aqm_delta, 0, sizeof(aqm_delta)); - set_bit_inv(apqi, aqm_delta); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1218,7 +1271,7 @@ static DEVICE_ATTR_WO(assign_domain); static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apid; struct vfio_ap_queue *q; @@ -1226,11 +1279,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1238,26 +1290,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist); if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1612,7 +1661,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) get_update_locks_for_kvm(kvm); kvm_arch_crypto_clear_masks(kvm); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; @@ -1748,15 +1797,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) } } -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) { int ret = 0, loop_cursor; struct vfio_ap_queue *q; - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) vfio_ap_mdev_reset_queue(q); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) { + flush_work(&q->reset_work); + + if (q->reset_status.response_code) + ret = -EIO; + } + + return ret; +} + +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist) +{ + int ret = 0; + struct vfio_ap_queue *q; + + list_for_each_entry(q, qlist, reset_qnode) + vfio_ap_mdev_reset_queue(q); + + list_for_each_entry(q, qlist, reset_qnode) { flush_work(&q->reset_work); if (q->reset_status.response_code) @@ -1942,7 +2009,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, ret = vfio_ap_mdev_get_device_info(arg); break; case VFIO_DEVICE_RESET: - ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + ret = vfio_ap_mdev_reset_queues(matrix_mdev); break; case VFIO_DEVICE_GET_IRQ_INFO: ret = vfio_ap_get_irq_info(arg); @@ -2088,6 +2155,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) { int ret; struct vfio_ap_queue *q; + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev; ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2109,15 +2177,28 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev)) + /* + * If we're in the process of handling the adding of adapters or + * domains to the host's AP configuration, then let the + * vfio_ap device driver's on_scan_complete callback filter the + * matrix and update the guest's AP configuration after all of + * the new queue devices are probed. + */ + if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) || + !bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS)) + goto done; + + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } } + +done: dev_set_drvdata(&apdev->device, q); release_update_locks_for_mdev(matrix_mdev); - return 0; + return ret; err_remove_group: sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2134,26 +2215,40 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) q = dev_get_drvdata(&apdev->device); get_update_locks_for_queue(q); matrix_mdev = q->matrix_mdev; + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); if (matrix_mdev) { - vfio_ap_unlink_queue_fr_mdev(q); - - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - - /* - * If the queue is assigned to the guest's APCB, then remove - * the adapter's APID from the APCB and hot it into the guest. - */ + /* If the queue is assigned to the guest's AP configuration */ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { + /* + * Since the queues are defined via a matrix of adapters + * and domains, it is not possible to hot unplug a + * single queue; so, let's unplug the adapter. + */ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apid(matrix_mdev, apid); + goto done; } } - vfio_ap_mdev_reset_queue(q); - flush_work(&q->reset_work); + /* + * If the queue is not in the host's AP configuration, then resetting + * it will fail with response code 01, (APQN not valid); so, let's make + * sure it is in the host's config. + */ + if (test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm) && + test_bit_inv(apqi, (unsigned long *)matrix_dev->info.aqm)) { + vfio_ap_mdev_reset_queue(q); + flush_work(&q->reset_work); + } + +done: + if (matrix_mdev) + vfio_ap_unlink_queue_fr_mdev(q); + dev_set_drvdata(&apdev->device, NULL); kfree(q); release_update_locks_for_mdev(matrix_mdev); @@ -2461,39 +2556,30 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) { - bool do_hotplug = false; - int filter_domains = 0; - int filter_adapters = 0; - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); + bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false; mutex_lock(&matrix_mdev->kvm->lock); mutex_lock(&matrix_dev->mdevs_lock); - filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, - matrix_mdev->apm_add, AP_DEVICES); - filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, - matrix_mdev->aqm_add, AP_DOMAINS); - - if (filter_adapters && filter_domains) - do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); - else if (filter_adapters) - do_hotplug |= - vfio_ap_mdev_filter_matrix(apm, - matrix_mdev->shadow_apcb.aqm, - matrix_mdev); - else - do_hotplug |= - vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, - aqm, matrix_mdev); + filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm, + matrix_mdev->adm_add, AP_DOMAINS); - if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, - AP_DOMAINS)) + if (filter_adapters || filter_domains) + do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered); + + if (filter_cdoms) do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); if (do_hotplug) vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&matrix_mdev->kvm->lock); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 88aff8b81f2fc664594ea3e2b354d03545e4e19c..98d37aa27044a643825e0632bfe2b445fcd717b1 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -133,6 +133,8 @@ struct ap_matrix_mdev { * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable + * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues + * that need to be reset * @reset_status: the status from the last reset of the queue * @reset_work: work to wait for queue reset to complete */ @@ -143,6 +145,7 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; struct hlist_node mdev_qnode; + struct list_head reset_qnode; struct ap_queue_status reset_status; struct work_struct reset_work; }; diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c index 408a806bf4c2d3c23a07ca0f45dbe71d6d741965..c64a085a7ee2f9f29c0e9fe1e00bed7e8eedf5a1 100644 --- a/drivers/scsi/fcoe/fcoe_sysfs.c +++ b/drivers/scsi/fcoe/fcoe_sysfs.c @@ -263,6 +263,7 @@ static ssize_t store_ctlr_mode(struct device *dev, const char *buf, size_t count) { struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + int res; if (count > FCOE_MAX_MODENAME_LEN) return -EINVAL; @@ -279,12 +280,13 @@ static ssize_t store_ctlr_mode(struct device *dev, return -ENOTSUPP; } - ctlr->mode = sysfs_match_string(fip_conn_type_names, buf); - if (ctlr->mode < 0 || ctlr->mode == FIP_CONN_TYPE_UNKNOWN) { + res = sysfs_match_string(fip_conn_type_names, buf); + if (res < 0 || res == FIP_CONN_TYPE_UNKNOWN) { LIBFCOE_SYSFS_DBG(ctlr, "Unknown mode %s provided.\n", buf); return -EINVAL; } + ctlr->mode = res; ctlr->f->set_fcoe_ctlr_mode(ctlr); LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.\n", buf); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 4d6db4509e755dcfc66803ff929e36140d8e8373..8d7fc5284293b5283523b049ba38387857ebb09e 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -546,6 +546,7 @@ int fnic_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc) if (fnic->sw_copy_wq[hwq].io_req_table[blk_mq_unique_tag_to_tag(mqtag)] != NULL) { WARN(1, "fnic<%d>: %s: hwq: %d tag 0x%x already exists\n", fnic->fnic_num, __func__, hwq, blk_mq_unique_tag_to_tag(mqtag)); + spin_unlock_irqrestore(&fnic->wq_copy_lock[hwq], flags); return SCSI_MLQUEUE_HOST_BUSY; } diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index d8c57a0a518f4c42aa0ff60417052dc871a431a7..528f19f782f2156d956a618eddb80e00f1faf728 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -475,7 +475,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) * @op_reply_q: op_reply_qinfo object * @reply_ci: operational reply descriptor's queue consumer index * - * Returns reply descriptor frame address + * Returns: reply descriptor frame address */ static inline struct mpi3_default_reply_descriptor * mpi3mr_get_reply_desc(struct op_reply_qinfo *op_reply_q, u32 reply_ci) @@ -1063,7 +1063,6 @@ enum mpi3mr_iocstate mpi3mr_get_iocstate(struct mpi3mr_ioc *mrioc) * @mrioc: Adapter instance reference * * Free the DMA memory allocated for IOCTL handling purpose. - * * Return: None */ @@ -1106,7 +1105,6 @@ static void mpi3mr_free_ioctl_dma_memory(struct mpi3mr_ioc *mrioc) /** * mpi3mr_alloc_ioctl_dma_memory - Alloc memory for ioctl dma * @mrioc: Adapter instance reference - * * This function allocates dmaable memory required to handle the * application issued MPI3 IOCTL requests. @@ -1241,7 +1239,7 @@ static int mpi3mr_issue_and_process_mur(struct mpi3mr_ioc *mrioc, * during reset/resume * @mrioc: Adapter instance reference * - * Return zero if the new IOCFacts parameters value is compatible with + * Return: zero if the new IOCFacts parameters value is compatible with * older values else return -EPERM */ static int diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 03348f605c2e9a5289082fbd2c1694c90229cf16..dd674378f2f392216334f7adb90129f2753e8c2a 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2889,7 +2889,7 @@ static void qla2x00_iocb_work_fn(struct work_struct *work) static void qla_trace_init(void) { - qla_trc_array = trace_array_get_by_name("qla2xxx"); + qla_trc_array = trace_array_get_by_name("qla2xxx", NULL); if (!qla_trc_array) { ql_log(ql_log_fatal, NULL, 0x0001, "Unable to create qla2xxx trace instance, instance logging will be disabled.\n"); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 885a7d5df3b9daa26bd0b42a428d7cb40ceae7d2..79da4b1c1df0adc649954a45f2d630989f12a6d6 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2197,15 +2197,18 @@ void scsi_eh_flush_done_q(struct list_head *done_q) struct scsi_cmnd *scmd, *next; list_for_each_entry_safe(scmd, next, done_q, eh_entry) { + struct scsi_device *sdev = scmd->device; + list_del_init(&scmd->eh_entry); - if (scsi_device_online(scmd->device) && - !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) && - scsi_eh_should_retry_cmd(scmd)) { + if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) && + scsi_cmd_retry_allowed(scmd) && + scsi_eh_should_retry_cmd(scmd)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: flush retry cmd\n", current->comm)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + blk_mq_kick_requeue_list(sdev->request_queue); } else { /* * If just we got sense for the device (called diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index 041940183516969318839f297583f16ab98b7b71..cdedc271857aae82ef5e25ae2506c4079c6bd6ca 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1347,7 +1347,6 @@ struct pqi_ctrl_info { bool controller_online; bool block_requests; bool scan_blocked; - u8 logical_volume_rescan_needed : 1; u8 inbound_spanning_supported : 1; u8 outbound_spanning_supported : 1; u8 pqi_mode_enabled : 1; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 9a58df9312fa7e4151ca07537391b030d91a2289..ceff1ec13f9ea9ea056da947d3939c51f4797522 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -33,11 +33,11 @@ #define BUILD_TIMESTAMP #endif -#define DRIVER_VERSION "2.1.24-046" +#define DRIVER_VERSION "2.1.26-030" #define DRIVER_MAJOR 2 #define DRIVER_MINOR 1 -#define DRIVER_RELEASE 24 -#define DRIVER_REVISION 46 +#define DRIVER_RELEASE 26 +#define DRIVER_REVISION 30 #define DRIVER_NAME "Microchip SmartPQI Driver (v" \ DRIVER_VERSION BUILD_TIMESTAMP ")" @@ -2093,8 +2093,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, if (existing_device->devtype == TYPE_DISK) { existing_device->raid_level = new_device->raid_level; existing_device->volume_status = new_device->volume_status; - if (ctrl_info->logical_volume_rescan_needed) - existing_device->rescan = true; memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); if (!pqi_raid_maps_equal(existing_device->raid_map, new_device->raid_map)) { kfree(existing_device->raid_map); @@ -2164,6 +2162,20 @@ static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device) INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker); } +static inline bool pqi_volume_rescan_needed(struct pqi_scsi_dev *device) +{ + if (pqi_device_in_remove(device)) + return false; + + if (device->sdev == NULL) + return false; + + if (!scsi_device_online(device->sdev)) + return false; + + return device->rescan; +} + static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices) { @@ -2284,9 +2296,13 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, if (device->sdev && device->queue_depth != device->advertised_queue_depth) { device->advertised_queue_depth = device->queue_depth; scsi_change_queue_depth(device->sdev, device->advertised_queue_depth); - if (device->rescan) { - scsi_rescan_device(device->sdev); + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + if (pqi_volume_rescan_needed(device)) { device->rescan = false; + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + scsi_rescan_device(device->sdev); + } else { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); } } } @@ -2308,8 +2324,6 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, } } - ctrl_info->logical_volume_rescan_needed = false; - } static inline bool pqi_is_supported_device(struct pqi_scsi_dev *device) @@ -3702,6 +3716,21 @@ static bool pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info, return ack_event; } +static void pqi_mark_volumes_for_rescan(struct pqi_ctrl_info *ctrl_info) +{ + unsigned long flags; + struct pqi_scsi_dev *device; + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) { + if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK) + device->rescan = true; + } + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); +} + static void pqi_disable_raid_bypass(struct pqi_ctrl_info *ctrl_info) { unsigned long flags; @@ -3742,7 +3771,7 @@ static void pqi_event_worker(struct work_struct *work) ack_event = true; rescan_needed = true; if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE) - ctrl_info->logical_volume_rescan_needed = true; + pqi_mark_volumes_for_rescan(ctrl_info); else if (event->event_type == PQI_EVENT_TYPE_AIO_STATE_CHANGE) pqi_disable_raid_bypass(ctrl_info); } @@ -10142,6 +10171,18 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1014, 0x0718) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f8) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f9) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02fa) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1e93, 0x1000) @@ -10198,6 +10239,34 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1f51, 0x100a) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100e) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100f) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1010) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1011) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1043) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1044) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1045) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_ANY_ID, PCI_ANY_ID) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 9d1bdcdc13312988b2ee052c3736bb548f21ffba..4cf20be668a6021c6acfae56c19f0914586a7bf6 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -37,6 +37,11 @@ #define VIRTIO_SCSI_EVENT_LEN 8 #define VIRTIO_SCSI_VQ_BASE 2 +static unsigned int virtscsi_poll_queues; +module_param(virtscsi_poll_queues, uint, 0644); +MODULE_PARM_DESC(virtscsi_poll_queues, + "The number of dedicated virtqueues for polling I/O"); + /* Command queue element */ struct virtio_scsi_cmd { struct scsi_cmnd *sc; @@ -76,6 +81,7 @@ struct virtio_scsi { struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; u32 num_queues; + int io_queues[HCTX_MAX_TYPES]; struct hlist_node node; @@ -722,9 +728,49 @@ static int virtscsi_abort(struct scsi_cmnd *sc) static void virtscsi_map_queues(struct Scsi_Host *shost) { struct virtio_scsi *vscsi = shost_priv(shost); - struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + int i, qoff; + + for (i = 0, qoff = 0; i < shost->nr_maps; i++) { + struct blk_mq_queue_map *map = &shost->tag_set.map[i]; + + map->nr_queues = vscsi->io_queues[i]; + map->queue_offset = qoff; + qoff += map->nr_queues; + + if (map->nr_queues == 0) + continue; + + /* + * Regular queues have interrupts and hence CPU affinity is + * defined by the core virtio code, but polling queues have + * no interrupts so we let the block layer assign CPU affinity. + */ + if (i == HCTX_TYPE_POLL) + blk_mq_map_queues(map); + else + blk_mq_virtio_map_queues(map, vscsi->vdev, 2); + } +} + +static int virtscsi_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) +{ + struct virtio_scsi *vscsi = shost_priv(shost); + struct virtio_scsi_vq *virtscsi_vq = &vscsi->req_vqs[queue_num]; + unsigned long flags; + unsigned int len; + int found = 0; + void *buf; + + spin_lock_irqsave(&virtscsi_vq->vq_lock, flags); + + while ((buf = virtqueue_get_buf(virtscsi_vq->vq, &len)) != NULL) { + virtscsi_complete_cmd(vscsi, buf); + found++; + } + + spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); - blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); + return found; } static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq) @@ -751,6 +797,7 @@ static const struct scsi_host_template virtscsi_host_template = { .this_id = -1, .cmd_size = sizeof(struct virtio_scsi_cmd), .queuecommand = virtscsi_queuecommand, + .mq_poll = virtscsi_mq_poll, .commit_rqs = virtscsi_commit_rqs, .change_queue_depth = virtscsi_change_queue_depth, .eh_abort_handler = virtscsi_abort, @@ -795,13 +842,14 @@ static int virtscsi_init(struct virtio_device *vdev, { int err; u32 i; - u32 num_vqs; + u32 num_vqs, num_poll_vqs, num_req_vqs; vq_callback_t **callbacks; const char **names; struct virtqueue **vqs; struct irq_affinity desc = { .pre_vectors = 2 }; - num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; + num_req_vqs = vscsi->num_queues; + num_vqs = num_req_vqs + VIRTIO_SCSI_VQ_BASE; vqs = kmalloc_array(num_vqs, sizeof(struct virtqueue *), GFP_KERNEL); callbacks = kmalloc_array(num_vqs, sizeof(vq_callback_t *), GFP_KERNEL); @@ -812,15 +860,31 @@ static int virtscsi_init(struct virtio_device *vdev, goto out; } + num_poll_vqs = min_t(unsigned int, virtscsi_poll_queues, + num_req_vqs - 1); + vscsi->io_queues[HCTX_TYPE_DEFAULT] = num_req_vqs - num_poll_vqs; + vscsi->io_queues[HCTX_TYPE_READ] = 0; + vscsi->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; + + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", + vscsi->io_queues[HCTX_TYPE_DEFAULT], + vscsi->io_queues[HCTX_TYPE_READ], + vscsi->io_queues[HCTX_TYPE_POLL]); + callbacks[0] = virtscsi_ctrl_done; callbacks[1] = virtscsi_event_done; names[0] = "control"; names[1] = "event"; - for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) { + for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs - num_poll_vqs; i++) { callbacks[i] = virtscsi_req_done; names[i] = "request"; } + for (; i < num_vqs; i++) { + callbacks[i] = NULL; + names[i] = "request_poll"; + } + /* Discover virtqueues and write information to configuration. */ err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) @@ -874,6 +938,7 @@ static int virtscsi_probe(struct virtio_device *vdev) sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1; shost->sg_tablesize = sg_elems; + shost->nr_maps = 1; vscsi = shost_priv(shost); vscsi->vdev = vdev; vscsi->num_queues = num_queues; @@ -883,6 +948,9 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + if (vscsi->io_queues[HCTX_TYPE_POLL]) + shost->nr_maps = HCTX_TYPE_POLL + 1; + shost->can_queue = virtqueue_get_vring_size(vscsi->req_vqs[0].vq); cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 3a99f6dcdfafa24b9115ea7358837a57aa95a423..f54bb4dd2d1016a5c18abd53356ee02bf101a19a 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -927,6 +927,14 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) amd_manager->bus.clk_stop_timeout = 200; amd_manager->bus.link_id = amd_manager->instance; + /* + * Due to BIOS compatibility, the two links are exposed within + * the scope of a single controller. If this changes, the + * controller_id will have to be updated with drv_data + * information. + */ + amd_manager->bus.controller_id = 0; + switch (amd_manager->instance) { case ACP_SDW0: amd_manager->num_dout_ports = AMD_SDW0_MAX_TX_PORTS; @@ -942,13 +950,13 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) amd_manager->reg_mask = &sdw_manager_reg_mask_array[amd_manager->instance]; params = &amd_manager->bus.params; - params->max_dr_freq = AMD_SDW_DEFAULT_CLK_FREQ * 2; - params->curr_dr_freq = AMD_SDW_DEFAULT_CLK_FREQ * 2; + params->col = AMD_SDW_DEFAULT_COLUMNS; params->row = AMD_SDW_DEFAULT_ROWS; prop = &amd_manager->bus.prop; prop->clk_freq = &amd_sdw_freq_tbl[0]; prop->mclk_freq = AMD_SDW_BUS_BASE_FREQ; + prop->max_clk_freq = AMD_SDW_DEFAULT_CLK_FREQ; ret = sdw_bus_master_add(&amd_manager->bus, dev, dev->fwnode); if (ret) { diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 41b0d9adf68ef349053b0fe8317bc9817eba4f31..f3fec15c311229f7a1da48bfb3de416ed52a95d6 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -22,6 +22,10 @@ static int sdw_get_id(struct sdw_bus *bus) return rc; bus->id = rc; + + if (bus->controller_id == -1) + bus->controller_id = rc; + return 0; } diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index d1553cb77187471b40e4601f3cd5399e241ad076..67abd7e52f092a988a2e3f20e253dd76c53b178f 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -20,7 +20,7 @@ void sdw_bus_debugfs_init(struct sdw_bus *bus) return; /* create the debugfs master-N */ - snprintf(name, sizeof(name), "master-%d-%d", bus->id, bus->link_id); + snprintf(name, sizeof(name), "master-%d-%d", bus->controller_id, bus->link_id); bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root); } diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index 31162f2b563811569dcf1bec7a0fc0757efa72f4..c70a63d009ae4b0ae4184952540f3a48b2b400d3 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -333,7 +333,7 @@ static int sdw_select_row_col(struct sdw_bus *bus, int clk_freq) */ static int sdw_compute_bus_params(struct sdw_bus *bus) { - unsigned int max_dr_freq, curr_dr_freq = 0; + unsigned int curr_dr_freq = 0; struct sdw_master_prop *mstr_prop = &bus->prop; int i, clk_values, ret; bool is_gear = false; @@ -351,14 +351,12 @@ static int sdw_compute_bus_params(struct sdw_bus *bus) clk_buf = NULL; } - max_dr_freq = mstr_prop->max_clk_freq * SDW_DOUBLE_RATE_FACTOR; - for (i = 0; i < clk_values; i++) { if (!clk_buf) - curr_dr_freq = max_dr_freq; + curr_dr_freq = bus->params.max_dr_freq; else curr_dr_freq = (is_gear) ? - (max_dr_freq >> clk_buf[i]) : + (bus->params.max_dr_freq >> clk_buf[i]) : clk_buf[i] * SDW_DOUBLE_RATE_FACTOR; if (curr_dr_freq <= bus->params.bandwidth) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 7f15e3549e539d053a82b63b8346a362c6d0145c..93698532deac4098706099dfceaf85055f563b09 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -234,6 +234,9 @@ static int intel_link_probe(struct auxiliary_device *auxdev, cdns->instance = sdw->instance; cdns->msg_count = 0; + /* single controller for all SoundWire links */ + bus->controller_id = 0; + bus->link_id = auxdev->id; bus->clk_stop_timeout = 1; diff --git a/drivers/soundwire/master.c b/drivers/soundwire/master.c index 9b05c9e25ebe48a7d135ff45ad4b3af0fb1cb2aa..51abedbbaa6630e53ab301b85d6dcefe2f8ce9ac 100644 --- a/drivers/soundwire/master.c +++ b/drivers/soundwire/master.c @@ -145,7 +145,7 @@ int sdw_master_device_add(struct sdw_bus *bus, struct device *parent, md->dev.fwnode = fwnode; md->dev.dma_mask = parent->dma_mask; - dev_set_name(&md->dev, "sdw-master-%d", bus->id); + dev_set_name(&md->dev, "sdw-master-%d-%d", bus->controller_id, bus->link_id); ret = device_register(&md->dev); if (ret) { diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 8076d40407d4d2647dcc658b760cdeddd02c3a21..3c4d6debab1f36b2648174071b7ad153ca4aac09 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1157,9 +1157,20 @@ static int qcom_swrm_stream_alloc_ports(struct qcom_swrm_ctrl *ctrl, struct sdw_port_runtime *p_rt; struct sdw_slave *slave; unsigned long *port_mask; - int i, maxport, pn, nports = 0, ret = 0; + int maxport, pn, nports = 0, ret = 0; unsigned int m_port; + if (direction == SNDRV_PCM_STREAM_CAPTURE) + sconfig.direction = SDW_DATA_DIR_TX; + else + sconfig.direction = SDW_DATA_DIR_RX; + + /* hw parameters wil be ignored as we only support PDM */ + sconfig.ch_count = 1; + sconfig.frame_rate = params_rate(params); + sconfig.type = stream->type; + sconfig.bps = 1; + mutex_lock(&ctrl->port_lock); list_for_each_entry(m_rt, &stream->master_list, stream_node) { if (m_rt->direction == SDW_DATA_DIR_RX) { @@ -1183,7 +1194,7 @@ static int qcom_swrm_stream_alloc_ports(struct qcom_swrm_ctrl *ctrl, if (pn > maxport) { dev_err(ctrl->dev, "All ports busy\n"); ret = -EBUSY; - goto err; + goto out; } set_bit(pn, port_mask); pconfig[nports].num = pn; @@ -1193,24 +1204,9 @@ static int qcom_swrm_stream_alloc_ports(struct qcom_swrm_ctrl *ctrl, } } - if (direction == SNDRV_PCM_STREAM_CAPTURE) - sconfig.direction = SDW_DATA_DIR_TX; - else - sconfig.direction = SDW_DATA_DIR_RX; - - /* hw parameters wil be ignored as we only support PDM */ - sconfig.ch_count = 1; - sconfig.frame_rate = params_rate(params); - sconfig.type = stream->type; - sconfig.bps = 1; sdw_stream_add_master(&ctrl->bus, &sconfig, pconfig, nports, stream); -err: - if (ret) { - for (i = 0; i < nports; i++) - clear_bit(pconfig[i].num, port_mask); - } - +out: mutex_unlock(&ctrl->port_lock); return ret; @@ -1593,6 +1589,13 @@ static int qcom_swrm_probe(struct platform_device *pdev) } } + ctrl->bus.controller_id = -1; + + if (ctrl->version > SWRM_VERSION_1_3_0) { + ctrl->reg_read(ctrl, SWRM_COMP_MASTER_ID, &val); + ctrl->bus.controller_id = val; + } + ret = sdw_bus_master_add(&ctrl->bus, dev, dev->fwnode); if (ret) { dev_err(dev, "Failed to register Soundwire controller (%d)\n", diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index c1c1a2ac293af4eed496edf98c89178eee5f9818..060c2982e26b009d561eafcbf81b888ada781fd4 100644 --- a/drivers/soundwire/slave.c +++ b/drivers/soundwire/slave.c @@ -39,14 +39,14 @@ int sdw_slave_add(struct sdw_bus *bus, slave->dev.fwnode = fwnode; if (id->unique_id == SDW_IGNORED_UNIQUE_ID) { - /* name shall be sdw:link:mfg:part:class */ - dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x", - bus->link_id, id->mfg_id, id->part_id, + /* name shall be sdw:ctrl:link:mfg:part:class */ + dev_set_name(&slave->dev, "sdw:%01x:%01x:%04x:%04x:%02x", + bus->controller_id, bus->link_id, id->mfg_id, id->part_id, id->class_id); } else { - /* name shall be sdw:link:mfg:part:class:unique */ - dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x:%01x", - bus->link_id, id->mfg_id, id->part_id, + /* name shall be sdw:ctrl:link:mfg:part:class:unique */ + dev_set_name(&slave->dev, "sdw:%01x:%01x:%04x:%04x:%02x:%01x", + bus->controller_id, bus->link_id, id->mfg_id, id->part_id, id->class_id, id->unique_id); } diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index f048b3d55b2edcb32269369aa7c2b5dda07414ea..f9c0adc0738db27a7578509edbfbcd82c60b5206 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -898,7 +898,7 @@ static struct sdw_port_runtime *sdw_port_alloc(struct list_head *port_list) } static int sdw_port_config(struct sdw_port_runtime *p_rt, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, int port_index) { p_rt->ch_mask = port_config[port_index].ch_mask; @@ -971,7 +971,7 @@ static int sdw_slave_port_is_valid_range(struct device *dev, int num) static int sdw_slave_port_config(struct sdw_slave *slave, struct sdw_slave_runtime *s_rt, - struct sdw_port_config *port_config) + const struct sdw_port_config *port_config) { struct sdw_port_runtime *p_rt; int ret; @@ -1027,7 +1027,7 @@ static int sdw_master_port_alloc(struct sdw_master_runtime *m_rt, } static int sdw_master_port_config(struct sdw_master_runtime *m_rt, - struct sdw_port_config *port_config) + const struct sdw_port_config *port_config) { struct sdw_port_runtime *p_rt; int ret; @@ -1862,7 +1862,7 @@ EXPORT_SYMBOL(sdw_release_stream); */ int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream) { @@ -1982,7 +1982,7 @@ EXPORT_SYMBOL(sdw_stream_remove_master); */ int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream) { diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c index f0b630fe16c3c8a79480b6f73718dc5f2a98b649..b341b6908df06db192ff5e7f5590f839ac9c3978 100644 --- a/drivers/spi/spi-coldfire-qspi.c +++ b/drivers/spi/spi-coldfire-qspi.c @@ -441,7 +441,6 @@ static void mcfqspi_remove(struct platform_device *pdev) mcfqspi_wr_qmr(mcfqspi, MCFQSPI_QMR_MSTR); mcfqspi_cs_teardown(mcfqspi); - clk_disable_unprepare(mcfqspi->clk); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/staging/greybus/i2c.c b/drivers/staging/greybus/i2c.c index de2f6516da09592807b6657884d05fec6dcca5d7..22325ab9d6521d686f839be3945ba9e37e8dd268 100644 --- a/drivers/staging/greybus/i2c.c +++ b/drivers/staging/greybus/i2c.c @@ -264,7 +264,7 @@ static int gb_i2c_probe(struct gbphy_device *gbphy_dev, /* Looks good; up our i2c adapter */ adapter = &gb_i2c_dev->adapter; adapter->owner = THIS_MODULE; - adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; + adapter->class = I2C_CLASS_HWMON; adapter->algo = &gb_i2c_algorithm; adapter->dev.parent = &gbphy_dev->dev; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 506193e870c49159b2a8ba8c5b07ec98e6084407..7a85e6477e4655e3cae5bab15aed90116dc31a94 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 670cfb7bd426ac677d15e0a9ae2b63f5c716c27a..73d0d6133ac8f2860323a98662db7a21acfe2d6c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3629,6 +3629,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; diff --git a/drivers/thermal/loongson2_thermal.c b/drivers/thermal/loongson2_thermal.c index 99ca0c7bc41c790a50cd9b5e7f10344189f0eae6..0f475fe46bc9dc4db106abb4b2a52fe5286b78e9 100644 --- a/drivers/thermal/loongson2_thermal.c +++ b/drivers/thermal/loongson2_thermal.c @@ -8,9 +8,10 @@ #include #include #include +#include #include -#include #include +#include #include #include #include "thermal_hwmon.h" diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 4f9264d005c06d55e5853bf9b12e329907648ca3..6e05c5c7bca1ad258502eaf158b94534c1cdd23d 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP config HVC_RISCV_SBI bool "RISC-V SBI console support" - depends on RISCV_SBI_V01 + depends on RISCV_SBI select HVC_DRIVER help This enables support for console output via RISC-V SBI calls, which diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c b/drivers/tty/hvc/hvc_riscv_sbi.c index a72591279f865845b6508db585b72880ff578e6d..cede8a57259492bfc15fa8ff36286371c8c6139d 100644 --- a/drivers/tty/hvc/hvc_riscv_sbi.c +++ b/drivers/tty/hvc/hvc_riscv_sbi.c @@ -40,21 +40,44 @@ static ssize_t hvc_sbi_tty_get(uint32_t vtermno, u8 *buf, size_t count) return i; } -static const struct hv_ops hvc_sbi_ops = { +static const struct hv_ops hvc_sbi_v01_ops = { .get_chars = hvc_sbi_tty_get, .put_chars = hvc_sbi_tty_put, }; -static int __init hvc_sbi_init(void) +static ssize_t hvc_sbi_dbcn_tty_put(uint32_t vtermno, const u8 *buf, size_t count) { - return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16)); + return sbi_debug_console_write(buf, count); } -device_initcall(hvc_sbi_init); -static int __init hvc_sbi_console_init(void) +static ssize_t hvc_sbi_dbcn_tty_get(uint32_t vtermno, u8 *buf, size_t count) { - hvc_instantiate(0, 0, &hvc_sbi_ops); + return sbi_debug_console_read(buf, count); +} + +static const struct hv_ops hvc_sbi_dbcn_ops = { + .put_chars = hvc_sbi_dbcn_tty_put, + .get_chars = hvc_sbi_dbcn_tty_get, +}; + +static int __init hvc_sbi_init(void) +{ + int err; + + if (sbi_debug_console_available) { + err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256)); + if (err) + return err; + hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops); + } else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) { + err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 256)); + if (err) + return err; + hvc_instantiate(0, 0, &hvc_sbi_v01_ops); + } else { + return -ENODEV; + } return 0; } -console_initcall(hvc_sbi_console_init); +device_initcall(hvc_sbi_init); diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 63a4b75ff264bacbbacb4e3da05a77ade29e61bc..ce0e99b6762f9181cb1eeeda31d1937e41e57bfa 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST config SERIAL_EARLYCON_RISCV_SBI bool "Early console using RISC-V SBI" - depends on RISCV_SBI_V01 + depends on RISCV_SBI select SERIAL_CORE select SERIAL_CORE_CONSOLE select SERIAL_EARLYCON diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c index 27afb0b74ea705bebbc3895b577ddc39b4eda6b6..0162155f0c83976e5aa9b014974fdf413f3b4a45 100644 --- a/drivers/tty/serial/earlycon-riscv-sbi.c +++ b/drivers/tty/serial/earlycon-riscv-sbi.c @@ -15,17 +15,38 @@ static void sbi_putc(struct uart_port *port, unsigned char c) sbi_console_putchar(c); } -static void sbi_console_write(struct console *con, - const char *s, unsigned n) +static void sbi_0_1_console_write(struct console *con, + const char *s, unsigned int n) { struct earlycon_device *dev = con->data; uart_console_write(&dev->port, s, n, sbi_putc); } +static void sbi_dbcn_console_write(struct console *con, + const char *s, unsigned int n) +{ + int ret; + + while (n) { + ret = sbi_debug_console_write(s, n); + if (ret < 0) + break; + + s += ret; + n -= ret; + } +} + static int __init early_sbi_setup(struct earlycon_device *device, const char *opt) { - device->con->write = sbi_console_write; + if (sbi_debug_console_available) + device->con->write = sbi_dbcn_console_write; + else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) + device->con->write = sbi_0_1_console_write; + else + return -ENODEV; + return 0; } EARLYCON_DECLARE(sbi, early_sbi_setup); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8530f1d668a755873196a53f09f2eb1ea524d34e..d52b3c5e66b769d14107043389a75b54a112e83d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8758,7 +8758,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) ufs_bsg_probe(hba); scsi_scan_host(hba->host); - pm_runtime_put_sync(hba->dev); out: return ret; @@ -9027,15 +9026,12 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) /* Probe and add UFS logical units */ ret = ufshcd_add_lus(hba); + out: - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ - if (ret) { - pm_runtime_put_sync(hba->dev); - ufshcd_hba_exit(hba); - } + pm_runtime_put_sync(hba->dev); + + if (ret) + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 480787048e752929d9b255cde85feb9d629292f1..39eef470f8fa5b88b41450aeec8833a619899e34 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1716,7 +1716,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) ufs_qcom_write_msi_msg); if (ret) { dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); - goto out; + return ret; } msi_lock_descs(hba->dev); @@ -1750,11 +1750,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), REG_UFS_CFG3); ufshcd_mcq_enable_esi(hba); - } - -out: - if (!ret) host->esi_enabled = true; + } return ret; } diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index 5a09a09cca709034503250c1ff64beb26fa35b3d..cce3d1837104c34f0bf331db33d5a81f64c0a6e7 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -497,7 +497,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!eni_vdpa->vring) { ret = -ENOMEM; ENI_ERR(pdev, "failed to allocate virtqueues\n"); - goto err; + goto err_remove_vp_legacy; } for (i = 0; i < eni_vdpa->queues; i++) { @@ -509,11 +509,13 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); if (ret) { ENI_ERR(pdev, "failed to register to vdpa bus\n"); - goto err; + goto err_remove_vp_legacy; } return 0; +err_remove_vp_legacy: + vp_legacy_remove(&eni_vdpa->ldev); err: put_device(&eni_vdpa->vdpa.dev); return ret; diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 84547d998bcf3b1a0305e489ff9cbf5b65c276eb..50aac8fe57ef57184c31e6080b33c3b543a635e4 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -35,6 +35,9 @@ struct mlx5_vdpa_mr { struct vhost_iotlb *iotlb; bool user_mr; + + refcount_t refcount; + struct list_head mr_list; }; struct mlx5_vdpa_resources { @@ -93,6 +96,7 @@ struct mlx5_vdpa_dev { u32 generation; struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + struct list_head mr_list_head; /* serialize mr access */ struct mutex mr_mtx; struct mlx5_control_vq cvq; @@ -118,8 +122,10 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, - struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr, unsigned int asid); diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 2197c46e563a1f13414588260e3fda3195f1052c..4758914ccf860838bb32c0bd3ae23d7280ce9ea5 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -498,32 +498,54 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { + if (WARN_ON(!mr)) + return; + if (mr->user_mr) destroy_user_mr(mvdev, mr); else destroy_dma_mr(mvdev, mr); vhost_iotlb_free(mr->iotlb); + + list_del(&mr->mr_list); + + kfree(mr); } -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, - struct mlx5_vdpa_mr *mr) +static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) { if (!mr) return; + if (refcount_dec_and_test(&mr->refcount)) + _mlx5_vdpa_destroy_mr(mvdev, mr); +} + +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ mutex_lock(&mvdev->mr_mtx); + _mlx5_vdpa_put_mr(mvdev, mr); + mutex_unlock(&mvdev->mr_mtx); +} - _mlx5_vdpa_destroy_mr(mvdev, mr); +static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + if (!mr) + return; - for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) { - if (mvdev->mr[i] == mr) - mvdev->mr[i] = NULL; - } + refcount_inc(&mr->refcount); +} +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + mutex_lock(&mvdev->mr_mtx); + _mlx5_vdpa_get_mr(mvdev, mr); mutex_unlock(&mvdev->mr_mtx); - - kfree(mr); } void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, @@ -534,10 +556,23 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, mutex_lock(&mvdev->mr_mtx); + _mlx5_vdpa_put_mr(mvdev, old_mr); mvdev->mr[asid] = new_mr; - if (old_mr) { - _mlx5_vdpa_destroy_mr(mvdev, old_mr); - kfree(old_mr); + + mutex_unlock(&mvdev->mr_mtx); +} + +static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr; + + mutex_lock(&mvdev->mr_mtx); + + list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) { + + mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " + "mr: %p, mkey: 0x%x, refcount: %u\n", + mr, mr->mkey, refcount_read(&mr->refcount)); } mutex_unlock(&mvdev->mr_mtx); @@ -547,9 +582,11 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) { for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]); + mlx5_vdpa_update_mr(mvdev, NULL, i); prune_iotlb(mvdev->cvq.iotlb); + + mlx5_vdpa_show_mr_leaks(mvdev); } static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, @@ -576,6 +613,8 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_iotlb; + list_add_tail(&mr->mr_list, &mvdev->mr_list_head); + return 0; err_iotlb: @@ -607,6 +646,8 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto out_err; + refcount_set(&mr->refcount, 1); + return mr; out_err: @@ -651,7 +692,7 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) if (asid >= MLX5_VDPA_NUM_AS) return -EINVAL; - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]); + mlx5_vdpa_update_mr(mvdev, NULL, asid); if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (mlx5_vdpa_create_dma_mr(mvdev)) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 26ba7da6b410621ea72e65d4bb90bd192e06dbda..778821bab7d93e3393440656306e60cfae52d096 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -120,6 +120,12 @@ struct mlx5_vdpa_virtqueue { u16 avail_idx; u16 used_idx; int fw_state; + + u64 modified_fields; + + struct mlx5_vdpa_mr *vq_mr; + struct mlx5_vdpa_mr *desc_mr; + struct msi_map map; /* keep last in the struct */ @@ -943,6 +949,14 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque kfree(in); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + + if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { + mlx5_vdpa_get_mr(mvdev, vq_desc_mr); + mvq->desc_mr = vq_desc_mr; + } + return 0; err_cmd: @@ -969,6 +983,12 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq } mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; umems_destroy(ndev, mvq); + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); + mvq->vq_mr = NULL; + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); + mvq->desc_mr = NULL; } static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) @@ -1167,7 +1187,12 @@ err_cmd: return err; } -static bool is_valid_state_change(int oldstate, int newstate) +static bool is_resumable(struct mlx5_vdpa_net *ndev) +{ + return ndev->mvdev.vdev.config->resume; +} + +static bool is_valid_state_change(int oldstate, int newstate, bool resumable) { switch (oldstate) { case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: @@ -1175,25 +1200,43 @@ static bool is_valid_state_change(int oldstate, int newstate) case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: default: return false; } } -static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) +{ + /* Only state is always modifiable */ + if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) + return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || + mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; + + return true; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state) { int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_mr *desc_mr = NULL; + struct mlx5_vdpa_mr *vq_mr = NULL; + bool state_change = false; void *obj_context; void *cmd_hdr; + void *vq_ctx; void *in; int err; if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) return 0; - if (!is_valid_state_change(mvq->fw_state, state)) + if (!modifiable_virtqueue_fields(mvq)) return -EINVAL; in = kzalloc(inlen, GFP_KERNEL); @@ -1208,17 +1251,83 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); - MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, - MLX5_VIRTQ_MODIFY_MASK_STATE); - MLX5_SET(virtio_net_q_object, obj_context, state, state); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { + err = -EINVAL; + goto done; + } + + MLX5_SET(virtio_net_q_object, obj_context, state, state); + state_change = true; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + + if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + } + + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); - kfree(in); - if (!err) + if (err) + goto done; + + if (state_change) mvq->fw_state = state; + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); + mlx5_vdpa_get_mr(mvdev, desc_mr); + mvq->desc_mr = desc_mr; + } + + mvq->modified_fields = 0; + +done: + kfree(in); return err; } +static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + unsigned int state) +{ + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; + return modify_virtqueue(ndev, mvq, state); +} + static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; @@ -1347,7 +1456,7 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) goto err_vq; if (mvq->ready) { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); if (err) { mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", idx, err); @@ -1382,7 +1491,7 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) return; - if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); if (query_virtqueue(ndev, mvq, &attr)) { @@ -1401,12 +1510,31 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev) suspend_vq(ndev, &ndev->vqs[i]); } +static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + if (!mvq->initialized || !is_resumable(ndev)) + return; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) + return; + + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)) + mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index); +} + +static void resume_vqs(struct mlx5_vdpa_net *ndev) +{ + for (int i = 0; i < ndev->mvdev.max_vqs; i++) + resume_vq(ndev, &ndev->vqs[i]); +} + static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { if (!mvq->initialized) return; suspend_vq(ndev, mvq); + mvq->modified_fields = 0; destroy_virtqueue(ndev, mvq); dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); @@ -2138,6 +2266,7 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ mvq->desc_addr = desc_area; mvq->device_addr = device_area; mvq->driver_addr = driver_area; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; return 0; } @@ -2207,7 +2336,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready if (!ready) { suspend_vq(ndev, mvq); } else { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); if (err) { mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); ready = false; @@ -2255,6 +2384,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, mvq->used_idx = state->split.avail_index; mvq->avail_idx = state->split.avail_index; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; return 0; } @@ -2703,24 +2834,35 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool teardown = !is_resumable(ndev); int err; suspend_vqs(ndev); - err = save_channels_info(ndev); - if (err) - return err; + if (teardown) { + err = save_channels_info(ndev); + if (err) + return err; - teardown_driver(ndev); + teardown_driver(ndev); + } mlx5_vdpa_update_mr(mvdev, new_mr, asid); + for (int i = 0; i < ndev->cur_num_vqs; i++) + ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) return 0; - restore_channels_info(ndev); - err = setup_driver(mvdev); - if (err) - return err; + if (teardown) { + restore_channels_info(ndev); + err = setup_driver(mvdev); + if (err) + return err; + } + + resume_vqs(ndev); return 0; } @@ -2804,8 +2946,10 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) { int i; - for (i = 0; i < ndev->mvdev.max_vqs; i++) + for (i = 0; i < ndev->mvdev.max_vqs; i++) { ndev->vqs[i].ready = false; + ndev->vqs[i].modified_fields = 0; + } ndev->mvdev.cvq.ready = false; } @@ -2982,7 +3126,7 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); out_err: - mlx5_vdpa_destroy_mr(mvdev, new_mr); + mlx5_vdpa_put_mr(mvdev, new_mr); return err; } @@ -3229,6 +3373,23 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) return 0; } +static int mlx5_vdpa_resume(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + mlx5_vdpa_info(mvdev, "resuming device\n"); + + down_write(&ndev->reslock); + mvdev->suspended = false; + resume_vqs(ndev); + register_link_notifier(ndev); + up_write(&ndev->reslock); + return 0; +} + static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, unsigned int asid) { @@ -3285,6 +3446,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_dma_dev = mlx5_get_vq_dma_dev, .free = mlx5_vdpa_free, .suspend = mlx5_vdpa_suspend, + .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ }; static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) @@ -3560,6 +3722,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_mpfs; + INIT_LIST_HEAD(&mvdev->mr_list_head); + if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { err = mlx5_vdpa_create_dma_mr(mvdev); if (err) @@ -3656,6 +3820,9 @@ static int mlx5v_probe(struct auxiliary_device *adev, if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) mgtdev->vdpa_ops.get_vq_desc_group = NULL; + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) + mgtdev->vdpa_ops.resume = NULL; + err = vdpa_mgmtdev_register(&mgtdev->mgtdev); if (err) goto reg_err; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index a7612e0783b36a89a61d5482a305c668a748ddac..d0695680b282ec7812983a7bd37cd9729dfa0126 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -131,7 +131,7 @@ static void vdpa_release_dev(struct device *d) if (ops->free) ops->free(vdev); - ida_simple_remove(&vdpa_index_ida, vdev->index); + ida_free(&vdpa_index_ida, vdev->index); kfree(vdev->driver_override); kfree(vdev); } @@ -205,7 +205,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, return vdev; err_name: - ida_simple_remove(&vdpa_index_ida, vdev->index); + ida_free(&vdpa_index_ida, vdev->index); err_ida: kfree(vdev); err: diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 6bda6dbb48784b7047c467388575f9e35c8e2c0b..ceae52fd7586d019778cb7d1026942bea962315d 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -80,6 +80,16 @@ config VFIO_VIRQFD select EVENTFD default n +config VFIO_DEBUGFS + bool "Export VFIO internals in DebugFS" + depends on DEBUG_FS + help + Allows exposure of VFIO device internals. This option enables + the use of debugfs by VFIO drivers as required. The device can + cause the VFIO code create a top-level debug/vfio directory + during initialization, and then populate a subdirectory with + entries as required. + source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 68c05705200fce8fc9824a8521bbe554e5c130f7..b2fc9fb499d8690cf7d75e32bdf9bbb02efdf9f7 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -7,6 +7,7 @@ vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o +vfio-$(CONFIG_VFIO_DEBUGFS) += debugfs.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o diff --git a/drivers/vfio/debugfs.c b/drivers/vfio/debugfs.c new file mode 100644 index 0000000000000000000000000000000000000000..298bd866f15766b50e342511d8a83f0621cb4f55 --- /dev/null +++ b/drivers/vfio/debugfs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, HiSilicon Ltd. + */ + +#include +#include +#include +#include +#include "vfio.h" + +static struct dentry *vfio_debugfs_root; + +static int vfio_device_state_read(struct seq_file *seq, void *data) +{ + struct device *vf_dev = seq->private; + struct vfio_device *vdev = container_of(vf_dev, + struct vfio_device, device); + enum vfio_device_mig_state state; + int ret; + + BUILD_BUG_ON(VFIO_DEVICE_STATE_NR != + VFIO_DEVICE_STATE_PRE_COPY_P2P + 1); + + ret = vdev->mig_ops->migration_get_state(vdev, &state); + if (ret) + return -EINVAL; + + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + seq_puts(seq, "ERROR\n"); + break; + case VFIO_DEVICE_STATE_STOP: + seq_puts(seq, "STOP\n"); + break; + case VFIO_DEVICE_STATE_RUNNING: + seq_puts(seq, "RUNNING\n"); + break; + case VFIO_DEVICE_STATE_STOP_COPY: + seq_puts(seq, "STOP_COPY\n"); + break; + case VFIO_DEVICE_STATE_RESUMING: + seq_puts(seq, "RESUMING\n"); + break; + case VFIO_DEVICE_STATE_RUNNING_P2P: + seq_puts(seq, "RUNNING_P2P\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY: + seq_puts(seq, "PRE_COPY\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY_P2P: + seq_puts(seq, "PRE_COPY_P2P\n"); + break; + default: + seq_puts(seq, "Invalid\n"); + } + + return 0; +} + +void vfio_device_debugfs_init(struct vfio_device *vdev) +{ + struct device *dev = &vdev->device; + + vdev->debug_root = debugfs_create_dir(dev_name(vdev->dev), + vfio_debugfs_root); + + if (vdev->mig_ops) { + struct dentry *vfio_dev_migration = NULL; + + vfio_dev_migration = debugfs_create_dir("migration", + vdev->debug_root); + debugfs_create_devm_seqfile(dev, "state", vfio_dev_migration, + vfio_device_state_read); + } +} + +void vfio_device_debugfs_exit(struct vfio_device *vdev) +{ + debugfs_remove_recursive(vdev->debug_root); +} + +void vfio_debugfs_create_root(void) +{ + vfio_debugfs_root = debugfs_create_dir("vfio", NULL); +} + +void vfio_debugfs_remove_root(void) +{ + debugfs_remove_recursive(vfio_debugfs_root); + vfio_debugfs_root = NULL; +} diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 8125e5f37832c40adbf25a6868389c78639e42cc..18c397df566d8dbf4716a583bf45f33faa6c1f0a 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -65,4 +65,6 @@ source "drivers/vfio/pci/hisilicon/Kconfig" source "drivers/vfio/pci/pds/Kconfig" +source "drivers/vfio/pci/virtio/Kconfig" + endmenu diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 45167be462d8f601c2da3924fd6848ef6c059cf9..046139a4eca5b58b733bfc62174ac54ec745307d 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -13,3 +13,5 @@ obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ + +obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/ diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index b2f9778c8366ea8944f1c59be51f41d69570da19..4d27465c8f1a893352bae1498303a5da7e4ebb6d 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -694,6 +694,7 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu size_t len, loff_t *pos) { struct hisi_acc_vf_migration_file *migf = filp->private_data; + u8 *vf_data = (u8 *)&migf->vf_data; loff_t requested_length; ssize_t done = 0; int ret; @@ -715,7 +716,7 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu goto out_unlock; } - ret = copy_from_user(&migf->vf_data, buf, len); + ret = copy_from_user(vf_data + *pos, buf, len); if (ret) { done = -EFAULT; goto out_unlock; @@ -835,7 +836,9 @@ static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len = min_t(size_t, migf->total_length - *pos, len); if (len) { - ret = copy_to_user(buf, &migf->vf_data, len); + u8 *vf_data = (u8 *)&migf->vf_data; + + ret = copy_to_user(buf, vf_data + *pos, len); if (ret) { done = -EFAULT; goto out_unlock; diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index c937aa6f39546da2d3f4443c0d35c9d984090f7f..8ddf4346fcd5d153ad24b7377edd7f412be28b47 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -70,7 +70,7 @@ out_free_region_info: kfree(region_info); } -static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, +static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, unsigned long bytes) { unsigned long *host_seq_bmp, *host_ack_bmp; @@ -85,47 +85,63 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->host_seq.bmp = host_seq_bmp; - dirty->host_ack.bmp = host_ack_bmp; + region->host_seq = host_seq_bmp; + region->host_ack = host_ack_bmp; + region->bmp_bytes = bytes; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->host_seq.bmp); - vfree(dirty->host_ack.bmp); - dirty->host_seq.bmp = NULL; - dirty->host_ack.bmp = NULL; + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + vfree(region->host_seq); + vfree(region->host_ack); + region->host_seq = NULL; + region->host_ack = NULL; + region->bmp_bytes = 0; + } } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info) + struct pds_vfio_region *region) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, bmp_info->sgl_addr, - bmp_info->num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, region->sgl_addr, + region->num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(bmp_info->sgl); + kfree(region->sgl); - bmp_info->num_sge = 0; - bmp_info->sgl = NULL; - bmp_info->sgl_addr = 0; + region->num_sge = 0; + region->sgl = NULL; + region->sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - if (pds_vfio->dirty.host_seq.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq); - if (pds_vfio->dirty.host_ack.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack); + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (region->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, region); + } } -static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info, - u32 page_count) +static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, + u32 page_count) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; @@ -147,32 +163,81 @@ static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - bmp_info->sgl = sgl; - bmp_info->num_sge = max_sge; - bmp_info->sgl_addr = sgl_addr; + region->sgl = sgl; + region->num_sge = max_sge; + region->sgl_addr = sgl_addr; return 0; } -static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - u32 page_count) +static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty) { + vfree(dirty->regions); + dirty->regions = NULL; + dirty->num_regions = 0; +} + +static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio, + struct pds_lm_dirty_region_info *region_info, + u64 region_page_size, u8 num_regions) +{ + struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + u32 dev_bmp_offset_byte = 0; int err; - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq, - page_count); - if (err) - return err; + dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region)); + if (!dirty->regions) + return -ENOMEM; + dirty->num_regions = num_regions; + + for (int i = 0; i < num_regions; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + struct pds_vfio_region *region = &dirty->regions[i]; + u64 region_size, region_start; + u32 page_count; + + /* page_count might be adjusted by the device */ + page_count = le32_to_cpu(ri->page_count); + region_start = le64_to_cpu(ri->dma_base); + region_size = page_count * region_page_size; + + err = pds_vfio_dirty_alloc_bitmaps(region, + page_count / BITS_PER_BYTE); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack, - page_count); - if (err) { - __pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq); - return err; + err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } + + region->size = region_size; + region->start = region_start; + region->page_size = region_page_size; + region->dev_bmp_offset_start_byte = dev_bmp_offset_byte; + + dev_bmp_offset_byte += page_count / BITS_PER_BYTE; + if (dev_bmp_offset_byte % BITS_PER_BYTE) { + dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n"); + err = -EINVAL; + goto out_free_regions; + } } return 0; + +out_free_regions: + pds_vfio_dirty_free_bitmaps(dirty); + pds_vfio_dirty_free_sgl(pds_vfio); + pds_vfio_dirty_free_regions(dirty); + + return err; } static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, @@ -181,16 +246,14 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - u64 region_start, region_size, region_page_size; struct pds_lm_dirty_region_info *region_info; struct interval_tree_node *node = NULL; + u64 region_page_size = *page_size; u8 max_regions = 0, num_regions; dma_addr_t regions_dma = 0; u32 num_ranges = nnodes; - u32 page_count; - u16 len; int err; + u16 len; dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", pds_vfio->vf_id); @@ -217,39 +280,38 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return -EOPNOTSUPP; } - /* - * Only support 1 region for now. If there are any large gaps in the - * VM's address regions, then this would be a waste of memory as we are - * generating 2 bitmaps (ack/seq) from the min address to the max - * address of the VM's address regions. In the future, if we support - * more than one region in the device/driver we can split the bitmaps - * on the largest address region gaps. We can do this split up to the - * max_regions times returned from the dirty_status command. - */ - max_regions = 1; if (num_ranges > max_regions) { vfio_combine_iova_ranges(ranges, nnodes, max_regions); num_ranges = max_regions; } + region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL); + if (!region_info) + return -ENOMEM; + len = num_ranges * sizeof(*region_info); + node = interval_tree_iter_first(ranges, 0, ULONG_MAX); if (!node) return -EINVAL; + for (int i = 0; i < num_ranges; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + u64 region_size = node->last - node->start + 1; + u64 region_start = node->start; + u32 page_count; - region_size = node->last - node->start + 1; - region_start = node->start; - region_page_size = *page_size; + page_count = DIV_ROUND_UP(region_size, region_page_size); - len = sizeof(*region_info); - region_info = kzalloc(len, GFP_KERNEL); - if (!region_info) - return -ENOMEM; + ri->dma_base = cpu_to_le64(region_start); + ri->page_count = cpu_to_le32(page_count); + ri->page_size_log2 = ilog2(region_page_size); - page_count = DIV_ROUND_UP(region_size, region_page_size); + dev_dbg(&pdev->dev, + "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n", + i, region_start, node->last, region_size, page_count, + region_page_size); - region_info->dma_base = cpu_to_le64(region_start); - region_info->page_count = cpu_to_le32(page_count); - region_info->page_size_log2 = ilog2(region_page_size); + node = interval_tree_iter_next(node, 0, ULONG_MAX); + } regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, DMA_BIDIRECTIONAL); @@ -258,39 +320,20 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_region_info; } - err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); + err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges); dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); if (err) goto out_free_region_info; - /* - * page_count might be adjusted by the device, - * update it before freeing region_info DMA - */ - page_count = le32_to_cpu(region_info->page_count); - - dev_dbg(&pdev->dev, - "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", - regions_dma, region_start, page_count, - (u8)ilog2(region_page_size)); - - err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); - if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", - ERR_PTR(err)); - goto out_free_region_info; - } - - err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); + err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info, + region_page_size, num_ranges); if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", - ERR_PTR(err)); - goto out_free_bitmaps; + dev_err(&pdev->dev, + "Failed to allocate %d regions for tracking dirty regions: %pe\n", + num_regions, ERR_PTR(err)); + goto out_dirty_disable; } - dirty->region_start = region_start; - dirty->region_size = region_size; - dirty->region_page_size = region_page_size; pds_vfio_dirty_set_enabled(pds_vfio); pds_vfio_print_guest_region_info(pds_vfio, max_regions); @@ -299,8 +342,8 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return 0; -out_free_bitmaps: - pds_vfio_dirty_free_bitmaps(dirty); +out_dirty_disable: + pds_vfio_dirty_disable_cmd(pds_vfio); out_free_region_info: kfree(region_info); return err; @@ -314,6 +357,7 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) pds_vfio_dirty_disable_cmd(pds_vfio); pds_vfio_dirty_free_sgl(pds_vfio); pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); + pds_vfio_dirty_free_regions(&pds_vfio->dirty); } if (send_cmd) @@ -321,8 +365,9 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) } static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info, - u32 offset, u32 bmp_bytes, bool read_seq) + struct pds_vfio_region *region, + unsigned long *seq_ack_bmp, u32 offset, + u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; @@ -339,7 +384,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, int err; int i; - bmp = (void *)((u64)bmp_info->bmp + offset); + bmp = (void *)((u64)seq_ack_bmp + offset); page_offset = offset_in_page(bmp); bmp -= page_offset; @@ -375,7 +420,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, goto out_free_sg_table; for_each_sgtable_dma_sg(&sg_table, sg, i) { - struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i]; + struct pds_lm_sg_elem *sg_elem = ®ion->sgl[i]; sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); sg_elem->len = cpu_to_le32(sg_dma_len(sg)); @@ -383,15 +428,16 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); - dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); - err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge, + offset += region->dev_bmp_offset_start_byte; + dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); + err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, offset, bmp_bytes, read_seq); if (err) dev_err(&pdev->dev, "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", bmp_type_str, offset, bmp_bytes, - num_sge, bmp_info->sgl_addr, ERR_PTR(err)); - dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); + num_sge, region->sgl_addr, ERR_PTR(err)); + dma_sync_single_for_cpu(pdsc_dev, region->sgl_addr, size, dma_dir); dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); out_free_sg_table: @@ -403,32 +449,36 @@ out_free_pages: } static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, + + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_ack, offset, len, WRITE_ACK); } static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_seq, offset, len, READ_SEQ); } static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, struct iova_bitmap *dirty_bitmap, u32 bmp_offset, u32 len_bytes) { - u64 page_size = pds_vfio->dirty.region_page_size; - u64 region_start = pds_vfio->dirty.region_start; + u64 page_size = region->page_size; + u64 region_start = region->start; u32 bmp_offset_bit; __le64 *seq, *ack; int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)region->host_seq + bmp_offset); + ack = (__le64 *)((u64)region->host_ack + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -451,12 +501,28 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, return 0; } +static struct pds_vfio_region * +pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova) +{ + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (iova >= region->start && + iova < (region->start + region->size)) + return region; + } + + return NULL; +} + static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, struct iova_bitmap *dirty_bitmap, unsigned long iova, unsigned long length) { struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_vfio_region *region; u64 bmp_offset, bmp_bytes; u64 bitmap_size, pages; int err; @@ -469,26 +535,31 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); + region = pds_vfio_get_region(pds_vfio, iova); + if (!region) { + dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n", + pds_vfio->vf_id, iova, length); + return -EINVAL; + } + + pages = DIV_ROUND_UP(length, region->page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; dev_dbg(dev, "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", - pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, + pds_vfio->vf_id, iova, length, region->page_size, pages, bitmap_size); - if (!length || ((dirty->region_start + iova + length) > - (dirty->region_start + dirty->region_size))) { + if (!length || ((iova - region->start + length) > region->size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; } /* bitmap is modified in 64 bit chunks */ - bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, - sizeof(u64)), - sizeof(u64)); + bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size, + sizeof(u64)), sizeof(u64)); if (bmp_bytes != bitmap_size) { dev_err(dev, "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", @@ -496,22 +567,30 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP(iova / dirty->region_page_size, sizeof(u64)); + if (bmp_bytes > region->bmp_bytes) { + dev_err(dev, + "Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n", + bmp_bytes, region->bmp_bytes); + return -EINVAL; + } + + bmp_offset = DIV_ROUND_UP((iova - region->start) / + region->page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", iova, length, bmp_offset, bmp_bytes); - err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_read_seq(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, - bmp_bytes); + err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap, + bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_write_ack(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index f78da25d75ca9c37318e0793c55b791bcc74432d..c8e23018b80186a41bca122d88e1d3f52e825d05 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -4,20 +4,22 @@ #ifndef _DIRTY_H_ #define _DIRTY_H_ -struct pds_vfio_bmp_info { - unsigned long *bmp; - u32 bmp_bytes; +struct pds_vfio_region { + unsigned long *host_seq; + unsigned long *host_ack; + u64 bmp_bytes; + u64 size; + u64 start; + u64 page_size; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; + u32 dev_bmp_offset_start_byte; u16 num_sge; }; struct pds_vfio_dirty { - struct pds_vfio_bmp_info host_seq; - struct pds_vfio_bmp_info host_ack; - u64 region_size; - u64 region_start; - u64 region_page_size; + struct pds_vfio_region *regions; + u8 num_regions; bool is_enabled; }; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e27de61ac9fe75f5818dc8d7386270c592c05a07..07fea08ea8a21340cacef113b238adea5dc8b59d 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -38,7 +38,7 @@ #define vfio_iowrite8 iowrite8 #define VFIO_IOWRITE(size) \ -static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \ +int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io) \ { \ if (test_mem) { \ @@ -55,7 +55,8 @@ static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \ up_read(&vdev->memory_lock); \ \ return 0; \ -} +} \ +EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); VFIO_IOWRITE(8) VFIO_IOWRITE(16) @@ -65,7 +66,7 @@ VFIO_IOWRITE(64) #endif #define VFIO_IOREAD(size) \ -static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \ +int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size *val, void __iomem *io) \ { \ if (test_mem) { \ @@ -82,7 +83,8 @@ static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \ up_read(&vdev->memory_lock); \ \ return 0; \ -} +} \ +EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); VFIO_IOREAD(8) VFIO_IOREAD(16) @@ -119,13 +121,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 4)) return -EFAULT; - ret = vfio_pci_iowrite32(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite32(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread32(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread32(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -141,13 +143,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 2)) return -EFAULT; - ret = vfio_pci_iowrite16(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite16(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread16(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread16(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -163,13 +165,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 1)) return -EFAULT; - ret = vfio_pci_iowrite8(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite8(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread8(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread8(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -200,7 +202,7 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, return done; } -static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -223,6 +225,7 @@ static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -262,7 +265,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, } x_end = end; } else { - int ret = vfio_pci_setup_barmap(vdev, bar); + int ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) { done = ret; goto out; @@ -363,21 +366,21 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, { switch (ioeventfd->count) { case 1: - vfio_pci_iowrite8(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 2: - vfio_pci_iowrite16(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 4: - vfio_pci_iowrite32(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #ifdef iowrite64 case 8: - vfio_pci_iowrite64(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #endif } @@ -438,7 +441,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, return -EINVAL; #endif - ret = vfio_pci_setup_barmap(vdev, bar); + ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) return ret; diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..bd80eca4a196c2da6eb8d59f071f74e8ba416de2 --- /dev/null +++ b/drivers/vfio/pci/virtio/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +config VIRTIO_VFIO_PCI + tristate "VFIO support for VIRTIO NET PCI devices" + depends on VIRTIO_PCI && VIRTIO_PCI_ADMIN_LEGACY + select VFIO_PCI_CORE + help + This provides support for exposing VIRTIO NET VF devices which support + legacy IO access, using the VFIO framework that can work with a legacy + virtio driver in the guest. + Based on PCIe spec, VFs do not support I/O Space. + As of that this driver emulates I/O BAR in software to let a VF be + seen as a transitional device by its users and let it work with + a legacy driver. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/virtio/Makefile b/drivers/vfio/pci/virtio/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7171105baf330013718ebcc9a3fd3593233f6dab --- /dev/null +++ b/drivers/vfio/pci/virtio/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio-vfio-pci.o +virtio-vfio-pci-y := main.o diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c new file mode 100644 index 0000000000000000000000000000000000000000..d5af683837d345eaf7afcb860e3f7774379f9474 --- /dev/null +++ b/drivers/vfio/pci/virtio/main.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct virtiovf_pci_core_device { + struct vfio_pci_core_device core_device; + u8 *bar0_virtual_buf; + /* synchronize access to the virtual buf */ + struct mutex bar_mutex; + void __iomem *notify_addr; + u64 notify_offset; + __le32 pci_base_addr_0; + __le16 pci_cmd; + u8 bar0_virtual_buf_size; + u8 notify_bar; +}; + +static int +virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev, + loff_t pos, char __user *buf, + size_t count, bool read) +{ + bool msix_enabled = + (virtvdev->core_device.irq_type == VFIO_PCI_MSIX_IRQ_INDEX); + struct pci_dev *pdev = virtvdev->core_device.pdev; + u8 *bar0_buf = virtvdev->bar0_virtual_buf; + bool common; + u8 offset; + int ret; + + common = pos < VIRTIO_PCI_CONFIG_OFF(msix_enabled); + /* offset within the relevant configuration area */ + offset = common ? pos : pos - VIRTIO_PCI_CONFIG_OFF(msix_enabled); + mutex_lock(&virtvdev->bar_mutex); + if (read) { + if (common) + ret = virtio_pci_admin_legacy_common_io_read(pdev, offset, + count, bar0_buf + pos); + else + ret = virtio_pci_admin_legacy_device_io_read(pdev, offset, + count, bar0_buf + pos); + if (ret) + goto out; + if (copy_to_user(buf, bar0_buf + pos, count)) + ret = -EFAULT; + } else { + if (copy_from_user(bar0_buf + pos, buf, count)) { + ret = -EFAULT; + goto out; + } + + if (common) + ret = virtio_pci_admin_legacy_common_io_write(pdev, offset, + count, bar0_buf + pos); + else + ret = virtio_pci_admin_legacy_device_io_write(pdev, offset, + count, bar0_buf + pos); + } +out: + mutex_unlock(&virtvdev->bar_mutex); + return ret; +} + +static int +virtiovf_pci_bar0_rw(struct virtiovf_pci_core_device *virtvdev, + loff_t pos, char __user *buf, + size_t count, bool read) +{ + struct vfio_pci_core_device *core_device = &virtvdev->core_device; + struct pci_dev *pdev = core_device->pdev; + u16 queue_notify; + int ret; + + if (!(le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO)) + return -EIO; + + if (pos + count > virtvdev->bar0_virtual_buf_size) + return -EINVAL; + + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) { + pci_info_ratelimited(pdev, "runtime resume failed %d\n", ret); + return -EIO; + } + + switch (pos) { + case VIRTIO_PCI_QUEUE_NOTIFY: + if (count != sizeof(queue_notify)) { + ret = -EINVAL; + goto end; + } + if (read) { + ret = vfio_pci_core_ioread16(core_device, true, &queue_notify, + virtvdev->notify_addr); + if (ret) + goto end; + if (copy_to_user(buf, &queue_notify, + sizeof(queue_notify))) { + ret = -EFAULT; + goto end; + } + } else { + if (copy_from_user(&queue_notify, buf, count)) { + ret = -EFAULT; + goto end; + } + ret = vfio_pci_core_iowrite16(core_device, true, queue_notify, + virtvdev->notify_addr); + } + break; + default: + ret = virtiovf_issue_legacy_rw_cmd(virtvdev, pos, buf, count, + read); + } + +end: + pm_runtime_put(&pdev->dev); + return ret ? ret : count; +} + +static bool range_intersect_range(loff_t range1_start, size_t count1, + loff_t range2_start, size_t count2, + loff_t *start_offset, + size_t *intersect_count, + size_t *register_offset) +{ + if (range1_start <= range2_start && + range1_start + count1 > range2_start) { + *start_offset = range2_start - range1_start; + *intersect_count = min_t(size_t, count2, + range1_start + count1 - range2_start); + *register_offset = 0; + return true; + } + + if (range1_start > range2_start && + range1_start < range2_start + count2) { + *start_offset = 0; + *intersect_count = min_t(size_t, count1, + range2_start + count2 - range1_start); + *register_offset = range1_start - range2_start; + return true; + } + + return false; +} + +static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev, + char __user *buf, size_t count, + loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + size_t register_offset; + loff_t copy_offset; + size_t copy_count; + __le32 val32; + __le16 val16; + u8 val8; + int ret; + + ret = vfio_pci_core_read(core_vdev, buf, count, ppos); + if (ret < 0) + return ret; + + if (range_intersect_range(pos, count, PCI_DEVICE_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count)) + return -EFAULT; + } + + if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) && + range_intersect_range(pos, count, PCI_COMMAND, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset, + copy_count)) + return -EFAULT; + val16 |= cpu_to_le16(PCI_COMMAND_IO); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_REVISION_ID, sizeof(val8), + ©_offset, ©_count, ®ister_offset)) { + /* Transional needs to have revision 0 */ + val8 = 0; + if (copy_to_user(buf + copy_offset, &val8, copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, sizeof(val32), + ©_offset, ©_count, ®ister_offset)) { + u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1); + u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0); + + val32 = cpu_to_le32((pci_base_addr_0 & bar_mask) | PCI_BASE_ADDRESS_SPACE_IO); + if (copy_to_user(buf + copy_offset, (void *)&val32 + register_offset, copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_SUBSYSTEM_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + /* + * Transitional devices use the PCI subsystem device id as + * virtio device id, same as legacy driver always did. + */ + val16 = cpu_to_le16(VIRTIO_ID_NET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + return count; +} + +static ssize_t +virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf, + size_t count, loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (!count) + return 0; + + if (index == VFIO_PCI_CONFIG_REGION_INDEX) + return virtiovf_pci_read_config(core_vdev, buf, count, ppos); + + if (index == VFIO_PCI_BAR0_REGION_INDEX) + return virtiovf_pci_bar0_rw(virtvdev, pos, buf, count, true); + + return vfio_pci_core_read(core_vdev, buf, count, ppos); +} + +static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + size_t register_offset; + loff_t copy_offset; + size_t copy_count; + + if (range_intersect_range(pos, count, PCI_COMMAND, sizeof(virtvdev->pci_cmd), + ©_offset, ©_count, + ®ister_offset)) { + if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset, + buf + copy_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, + sizeof(virtvdev->pci_base_addr_0), + ©_offset, ©_count, + ®ister_offset)) { + if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset, + buf + copy_offset, + copy_count)) + return -EFAULT; + } + + return vfio_pci_core_write(core_vdev, buf, count, ppos); +} + +static ssize_t +virtiovf_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (!count) + return 0; + + if (index == VFIO_PCI_CONFIG_REGION_INDEX) + return virtiovf_pci_write_config(core_vdev, buf, count, ppos); + + if (index == VFIO_PCI_BAR0_REGION_INDEX) + return virtiovf_pci_bar0_rw(virtvdev, pos, (char __user *)buf, count, false); + + return vfio_pci_core_write(core_vdev, buf, count, ppos); +} + +static int +virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev, + unsigned int cmd, unsigned long arg) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned long minsz = offsetofend(struct vfio_region_info, offset); + void __user *uarg = (void __user *)arg; + struct vfio_region_info info = {}; + + if (copy_from_user(&info, uarg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + switch (info.index) { + case VFIO_PCI_BAR0_REGION_INDEX: + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = virtvdev->bar0_virtual_buf_size; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + return copy_to_user(uarg, &info, minsz) ? -EFAULT : 0; + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} + +static long +virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case VFIO_DEVICE_GET_REGION_INFO: + return virtiovf_pci_ioctl_get_region_info(core_vdev, cmd, arg); + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} + +static int +virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev) +{ + struct vfio_pci_core_device *core_device = &virtvdev->core_device; + int ret; + + /* + * Setup the BAR where the 'notify' exists to be used by vfio as well + * This will let us mmap it only once and use it when needed. + */ + ret = vfio_pci_core_setup_barmap(core_device, + virtvdev->notify_bar); + if (ret) + return ret; + + virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] + + virtvdev->notify_offset; + return 0; +} + +static int virtiovf_pci_open_device(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + struct vfio_pci_core_device *vdev = &virtvdev->core_device; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (virtvdev->bar0_virtual_buf) { + /* + * Upon close_device() the vfio_pci_core_disable() is called + * and will close all the previous mmaps, so it seems that the + * valid life cycle for the 'notify' addr is per open/close. + */ + ret = virtiovf_set_notify_addr(virtvdev); + if (ret) { + vfio_pci_core_disable(vdev); + return ret; + } + } + + vfio_pci_core_finish_enable(vdev); + return 0; +} + +static int virtiovf_get_device_config_size(unsigned short device) +{ + /* Network card */ + return offsetofend(struct virtio_net_config, status); +} + +static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev) +{ + u64 offset; + int ret; + u8 bar; + + ret = virtio_pci_admin_legacy_io_notify_info(virtvdev->core_device.pdev, + VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM, + &bar, &offset); + if (ret) + return ret; + + virtvdev->notify_bar = bar; + virtvdev->notify_offset = offset; + return 0; +} + +static int virtiovf_pci_init_device(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + struct pci_dev *pdev; + int ret; + + ret = vfio_pci_core_init_dev(core_vdev); + if (ret) + return ret; + + pdev = virtvdev->core_device.pdev; + ret = virtiovf_read_notify_info(virtvdev); + if (ret) + return ret; + + virtvdev->bar0_virtual_buf_size = VIRTIO_PCI_CONFIG_OFF(true) + + virtiovf_get_device_config_size(pdev->device); + BUILD_BUG_ON(!is_power_of_2(virtvdev->bar0_virtual_buf_size)); + virtvdev->bar0_virtual_buf = kzalloc(virtvdev->bar0_virtual_buf_size, + GFP_KERNEL); + if (!virtvdev->bar0_virtual_buf) + return -ENOMEM; + mutex_init(&virtvdev->bar_mutex); + return 0; +} + +static void virtiovf_pci_core_release_dev(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + + kfree(virtvdev->bar0_virtual_buf); + vfio_pci_core_release_dev(core_vdev); +} + +static const struct vfio_device_ops virtiovf_vfio_pci_tran_ops = { + .name = "virtio-vfio-pci-trans", + .init = virtiovf_pci_init_device, + .release = virtiovf_pci_core_release_dev, + .open_device = virtiovf_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = virtiovf_vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = virtiovf_pci_core_read, + .write = virtiovf_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static const struct vfio_device_ops virtiovf_vfio_pci_ops = { + .name = "virtio-vfio-pci", + .init = vfio_pci_core_init_dev, + .release = vfio_pci_core_release_dev, + .open_device = virtiovf_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static bool virtiovf_bar0_exists(struct pci_dev *pdev) +{ + struct resource *res = pdev->resource; + + return res->flags; +} + +static int virtiovf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + const struct vfio_device_ops *ops = &virtiovf_vfio_pci_ops; + struct virtiovf_pci_core_device *virtvdev; + int ret; + + if (pdev->is_virtfn && virtio_pci_admin_has_legacy_io(pdev) && + !virtiovf_bar0_exists(pdev)) + ops = &virtiovf_vfio_pci_tran_ops; + + virtvdev = vfio_alloc_device(virtiovf_pci_core_device, core_device.vdev, + &pdev->dev, ops); + if (IS_ERR(virtvdev)) + return PTR_ERR(virtvdev); + + dev_set_drvdata(&pdev->dev, &virtvdev->core_device); + ret = vfio_pci_core_register_device(&virtvdev->core_device); + if (ret) + goto out; + return 0; +out: + vfio_put_device(&virtvdev->core_device.vdev); + return ret; +} + +static void virtiovf_pci_remove(struct pci_dev *pdev) +{ + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); + + vfio_pci_core_unregister_device(&virtvdev->core_device); + vfio_put_device(&virtvdev->core_device.vdev); +} + +static const struct pci_device_id virtiovf_pci_table[] = { + /* Only virtio-net is supported/tested so far */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) }, + {} +}; + +MODULE_DEVICE_TABLE(pci, virtiovf_pci_table); + +static void virtiovf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); + + virtvdev->pci_cmd = 0; +} + +static const struct pci_error_handlers virtiovf_err_handlers = { + .reset_done = virtiovf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver virtiovf_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = virtiovf_pci_table, + .probe = virtiovf_pci_probe, + .remove = virtiovf_pci_remove, + .err_handler = &virtiovf_err_handlers, + .driver_managed_dma = true, +}; + +module_pci_driver(virtiovf_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yishai Hadas "); +MODULE_DESCRIPTION( + "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices"); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 307e3f29b527f5b6178ded2705bf9baebda2e3a9..bde84ad344e50181685f5fbc2620c20b7b33f5a0 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -448,4 +448,18 @@ static inline void vfio_device_put_kvm(struct vfio_device *device) } #endif +#ifdef CONFIG_VFIO_DEBUGFS +void vfio_debugfs_create_root(void); +void vfio_debugfs_remove_root(void); + +void vfio_device_debugfs_init(struct vfio_device *vdev); +void vfio_device_debugfs_exit(struct vfio_device *vdev); +#else +static inline void vfio_debugfs_create_root(void) { } +static inline void vfio_debugfs_remove_root(void) { } + +static inline void vfio_device_debugfs_init(struct vfio_device *vdev) { } +static inline void vfio_device_debugfs_exit(struct vfio_device *vdev) { } +#endif /* CONFIG_VFIO_DEBUGFS */ + #endif diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index eacd6ec04de5a42d58b1a677cffb12d514556f4b..b2854d7939ce02ddf2be186e483c0f8c3f094a26 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1436,7 +1436,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, npage << PAGE_SHIFT, prot | IOMMU_CACHE, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (ret) goto unwind; @@ -1750,7 +1750,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, size, - dma->prot | IOMMU_CACHE, GFP_KERNEL); + dma->prot | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1845,7 +1846,8 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * continue; ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 8d4995ada74a01848ce8e7becf61120cc10ec33a..1cc93aac99a290d903819635284860b48600ab5d 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -311,6 +311,7 @@ static int __vfio_register_dev(struct vfio_device *device, refcount_set(&device->refcount, 1); vfio_device_group_register(device); + vfio_device_debugfs_init(device); return 0; err_out: @@ -378,6 +379,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } + vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } @@ -1676,6 +1678,7 @@ static int __init vfio_init(void) if (ret) goto err_alloc_dev_chrdev; + vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; @@ -1691,6 +1694,7 @@ err_virqfd: static void __exit vfio_cleanup(void) { + vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); vfio_cdev_cleanup(); class_destroy(vfio.device_class); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 173beda74b38c09446da95fe79c64ae6440cf2e0..bc4a51e4638b46c79345f2be89dc7639b81ee28a 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -59,6 +59,7 @@ struct vhost_vdpa { int in_batch; struct vdpa_iova_range range; u32 batch_asid; + bool suspended; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -232,6 +233,8 @@ static int _compat_vdpa_reset(struct vhost_vdpa *v) struct vdpa_device *vdpa = v->vdpa; u32 flags = 0; + v->suspended = false; + if (v->vdev.vqs) { flags |= !vhost_backend_has_feature(v->vdev.vqs[0], VHOST_BACKEND_F_IOTLB_PERSIST) ? @@ -590,11 +593,16 @@ static long vhost_vdpa_suspend(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + int ret; if (!ops->suspend) return -EOPNOTSUPP; - return ops->suspend(vdpa); + ret = ops->suspend(vdpa); + if (!ret) + v->suspended = true; + + return ret; } /* After a successful return of this ioctl the device resumes processing @@ -605,11 +613,16 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + int ret; if (!ops->resume) return -EOPNOTSUPP; - return ops->resume(vdpa); + ret = ops->resume(vdpa); + if (!ret) + v->suspended = false; + + return ret; } static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, @@ -690,6 +703,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, switch (cmd) { case VHOST_SET_VRING_ADDR: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (ops->set_vq_address(vdpa, idx, (u64)(uintptr_t)vq->desc, (u64)(uintptr_t)vq->avail, @@ -698,6 +714,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); @@ -968,7 +987,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm), GFP_KERNEL); + perm_to_iommu_flags(perm), + GFP_KERNEL_ACCOUNT); } if (r) { vhost_iotlb_del_range(iotlb, iova, iova + size - 1); diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 94d092091b5e76cdfe2d7696d1edf0a2de2c4b3f..ea2d0d69bd8cc11f6393bea249b9744d57fb86a7 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -395,6 +395,17 @@ config BACKLIGHT_LP8788 help This supports TI LP8788 backlight driver. +config BACKLIGHT_MP3309C + tristate "Backlight Driver for MPS MP3309C" + depends on I2C && PWM + select REGMAP_I2C + help + This supports MPS MP3309C backlight WLED driver in both PWM and + analog/I2C dimming modes. + + To compile this driver as a module, choose M here: the module will + be called mp3309c. + config BACKLIGHT_PANDORA tristate "Backlight driver for Pandora console" depends on TWL4030_CORE diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 67d3ff39be3c1e4d4969c2ea5eff199504c585a0..06966cb204597b74afffe16ba4814357843b2d86 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o obj-$(CONFIG_BACKLIGHT_LP8788) += lp8788_bl.o obj-$(CONFIG_BACKLIGHT_LV5207LP) += lv5207lp.o obj-$(CONFIG_BACKLIGHT_MAX8925) += max8925_bl.o +obj-$(CONFIG_BACKLIGHT_MP3309C) += mp3309c.o obj-$(CONFIG_BACKLIGHT_MT6370) += mt6370-backlight.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_PANDORA) += pandora_bl.o diff --git a/drivers/video/backlight/hx8357.c b/drivers/video/backlight/hx8357.c index f76d2469d490103dfd7090310c60a73f7736207b..d7298376cf74ddd89c4804ab297b621538ff15e2 100644 --- a/drivers/video/backlight/hx8357.c +++ b/drivers/video/backlight/hx8357.c @@ -6,11 +6,11 @@ */ #include +#include #include #include #include #include -#include #include #define HX8357_NUM_IM_PINS 3 @@ -83,11 +83,10 @@ #define HX8369_SET_GAMMA_CURVE_RELATED 0xe0 struct hx8357_data { - unsigned im_pins[HX8357_NUM_IM_PINS]; - unsigned reset; + struct gpio_descs *im_pins; + struct gpio_desc *reset; struct spi_device *spi; int state; - bool use_im_pins; }; static u8 hx8357_seq_power[] = { @@ -321,11 +320,11 @@ static void hx8357_lcd_reset(struct lcd_device *lcdev) struct hx8357_data *lcd = lcd_get_data(lcdev); /* Reset the screen */ - gpio_set_value(lcd->reset, 1); + gpiod_set_value(lcd->reset, 0); usleep_range(10000, 12000); - gpio_set_value(lcd->reset, 0); + gpiod_set_value(lcd->reset, 1); usleep_range(10000, 12000); - gpio_set_value(lcd->reset, 1); + gpiod_set_value(lcd->reset, 0); /* The controller needs 120ms to recover from reset */ msleep(120); @@ -340,10 +339,10 @@ static int hx8357_lcd_init(struct lcd_device *lcdev) * Set the interface selection pins to SPI mode, with three * wires */ - if (lcd->use_im_pins) { - gpio_set_value_cansleep(lcd->im_pins[0], 1); - gpio_set_value_cansleep(lcd->im_pins[1], 0); - gpio_set_value_cansleep(lcd->im_pins[2], 1); + if (lcd->im_pins) { + gpiod_set_value_cansleep(lcd->im_pins->desc[0], 1); + gpiod_set_value_cansleep(lcd->im_pins->desc[1], 0); + gpiod_set_value_cansleep(lcd->im_pins->desc[2], 1); } ret = hx8357_spi_write_array(lcdev, hx8357_seq_power, @@ -580,6 +579,7 @@ MODULE_DEVICE_TABLE(of, hx8357_dt_ids); static int hx8357_probe(struct spi_device *spi) { + struct device *dev = &spi->dev; struct lcd_device *lcdev; struct hx8357_data *lcd; const struct of_device_id *match; @@ -601,49 +601,19 @@ static int hx8357_probe(struct spi_device *spi) if (!match || !match->data) return -EINVAL; - lcd->reset = of_get_named_gpio(spi->dev.of_node, "gpios-reset", 0); - if (!gpio_is_valid(lcd->reset)) { - dev_err(&spi->dev, "Missing dt property: gpios-reset\n"); - return -EINVAL; - } + lcd->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(lcd->reset)) + return dev_err_probe(dev, PTR_ERR(lcd->reset), "failed to request reset GPIO\n"); + gpiod_set_consumer_name(lcd->reset, "hx8357-reset"); - ret = devm_gpio_request_one(&spi->dev, lcd->reset, - GPIOF_OUT_INIT_HIGH, - "hx8357-reset"); - if (ret) { - dev_err(&spi->dev, - "failed to request gpio %d: %d\n", - lcd->reset, ret); - return -EINVAL; - } + lcd->im_pins = devm_gpiod_get_array_optional(dev, "im", GPIOD_OUT_LOW); + if (IS_ERR(lcd->im_pins)) + return dev_err_probe(dev, PTR_ERR(lcd->im_pins), "failed to request im GPIOs\n"); + if (lcd->im_pins->ndescs < HX8357_NUM_IM_PINS) + return dev_err_probe(dev, -EINVAL, "not enough im GPIOs\n"); - if (of_property_present(spi->dev.of_node, "im-gpios")) { - lcd->use_im_pins = 1; - - for (i = 0; i < HX8357_NUM_IM_PINS; i++) { - lcd->im_pins[i] = of_get_named_gpio(spi->dev.of_node, - "im-gpios", i); - if (lcd->im_pins[i] == -EPROBE_DEFER) { - dev_info(&spi->dev, "GPIO requested is not here yet, deferring the probe\n"); - return -EPROBE_DEFER; - } - if (!gpio_is_valid(lcd->im_pins[i])) { - dev_err(&spi->dev, "Missing dt property: im-gpios\n"); - return -EINVAL; - } - - ret = devm_gpio_request_one(&spi->dev, lcd->im_pins[i], - GPIOF_OUT_INIT_LOW, - "im_pins"); - if (ret) { - dev_err(&spi->dev, "failed to request gpio %d: %d\n", - lcd->im_pins[i], ret); - return -EINVAL; - } - } - } else { - lcd->use_im_pins = 0; - } + for (i = 0; i < HX8357_NUM_IM_PINS; i++) + gpiod_set_consumer_name(lcd->im_pins->desc[i], "im_pins"); lcdev = devm_lcd_device_register(&spi->dev, "mxsfb", &spi->dev, lcd, &hx8357_ops); diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c index e7b6bd827986fa931de3d3e6c22565e8227afd35..c8e0e655dc867d49ac6729abecea39e9590a8792 100644 --- a/drivers/video/backlight/ili922x.c +++ b/drivers/video/backlight/ili922x.c @@ -81,7 +81,7 @@ #define START_RW_WRITE 0 #define START_RW_READ 1 -/** +/* * START_BYTE(id, rs, rw) * * Set the start byte according to the required operation. @@ -100,7 +100,7 @@ #define START_BYTE(id, rs, rw) \ (0x70 | (((id) & 0x01) << 2) | (((rs) & 0x01) << 1) | ((rw) & 0x01)) -/** +/* * CHECK_FREQ_REG(spi_device s, spi_transfer x) - Check the frequency * for the SPI transfer. According to the datasheet, the controller * accept higher frequency for the GRAM transfer, but it requires @@ -269,6 +269,10 @@ static int ili922x_write(struct spi_device *spi, u8 reg, u16 value) spi_message_add_tail(&xfer_regindex, &msg); ret = spi_sync(spi, &msg); + if (ret < 0) { + dev_err(&spi->dev, "Error sending SPI message 0x%x", ret); + return ret; + } spi_message_init(&msg); tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_REG, diff --git a/drivers/video/backlight/mp3309c.c b/drivers/video/backlight/mp3309c.c new file mode 100644 index 0000000000000000000000000000000000000000..34d71259fac1d7da3222f02ef8bc716a8e54f25c --- /dev/null +++ b/drivers/video/backlight/mp3309c.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for MPS MP3309C White LED driver with I2C interface + * + * This driver support both analog (by I2C commands) and PWM dimming control + * modes. + * + * Copyright (C) 2023 ASEM Srl + * Author: Flavio Suligoi + * + * Based on pwm_bl.c + */ + +#include +#include +#include +#include +#include +#include + +#define REG_I2C_0 0x00 +#define REG_I2C_1 0x01 + +#define REG_I2C_0_EN 0x80 +#define REG_I2C_0_D0 0x40 +#define REG_I2C_0_D1 0x20 +#define REG_I2C_0_D2 0x10 +#define REG_I2C_0_D3 0x08 +#define REG_I2C_0_D4 0x04 +#define REG_I2C_0_RSRV1 0x02 +#define REG_I2C_0_RSRV2 0x01 + +#define REG_I2C_1_RSRV1 0x80 +#define REG_I2C_1_DIMS 0x40 +#define REG_I2C_1_SYNC 0x20 +#define REG_I2C_1_OVP0 0x10 +#define REG_I2C_1_OVP1 0x08 +#define REG_I2C_1_VOS 0x04 +#define REG_I2C_1_LEDO 0x02 +#define REG_I2C_1_OTP 0x01 + +#define ANALOG_I2C_NUM_LEVELS 32 /* 0..31 */ +#define ANALOG_I2C_REG_MASK 0x7c + +#define MP3309C_PWM_DEFAULT_NUM_LEVELS 256 /* 0..255 */ + +enum mp3309c_status_value { + FIRST_POWER_ON, + BACKLIGHT_OFF, + BACKLIGHT_ON, +}; + +enum mp3309c_dimming_mode_value { + DIMMING_PWM, + DIMMING_ANALOG_I2C, +}; + +struct mp3309c_platform_data { + unsigned int max_brightness; + unsigned int default_brightness; + unsigned int *levels; + u8 dimming_mode; + u8 over_voltage_protection; + bool sync_mode; + u8 status; +}; + +struct mp3309c_chip { + struct device *dev; + struct mp3309c_platform_data *pdata; + struct backlight_device *bl; + struct gpio_desc *enable_gpio; + struct regmap *regmap; + struct pwm_device *pwmd; +}; + +static const struct regmap_config mp3309c_regmap = { + .name = "mp3309c_regmap", + .reg_bits = 8, + .reg_stride = 1, + .val_bits = 8, + .max_register = REG_I2C_1, +}; + +static int mp3309c_enable_device(struct mp3309c_chip *chip) +{ + u8 reg_val; + int ret; + + /* I2C register #0 - Device enable */ + ret = regmap_update_bits(chip->regmap, REG_I2C_0, REG_I2C_0_EN, + REG_I2C_0_EN); + if (ret) + return ret; + + /* + * I2C register #1 - Set working mode: + * - set one of the two dimming mode: + * - PWM dimming using an external PWM dimming signal + * - analog dimming using I2C commands + * - enable/disable synchronous mode + * - set overvoltage protection (OVP) + */ + reg_val = 0x00; + if (chip->pdata->dimming_mode == DIMMING_PWM) + reg_val |= REG_I2C_1_DIMS; + if (chip->pdata->sync_mode) + reg_val |= REG_I2C_1_SYNC; + reg_val |= chip->pdata->over_voltage_protection; + ret = regmap_write(chip->regmap, REG_I2C_1, reg_val); + if (ret) + return ret; + + return 0; +} + +static int mp3309c_bl_update_status(struct backlight_device *bl) +{ + struct mp3309c_chip *chip = bl_get_data(bl); + int brightness = backlight_get_brightness(bl); + struct pwm_state pwmstate; + unsigned int analog_val, bits_val; + int i, ret; + + if (chip->pdata->dimming_mode == DIMMING_PWM) { + /* + * PWM control mode + */ + pwm_get_state(chip->pwmd, &pwmstate); + pwm_set_relative_duty_cycle(&pwmstate, + chip->pdata->levels[brightness], + chip->pdata->levels[chip->pdata->max_brightness]); + pwmstate.enabled = true; + ret = pwm_apply_state(chip->pwmd, &pwmstate); + if (ret) + return ret; + + switch (chip->pdata->status) { + case FIRST_POWER_ON: + case BACKLIGHT_OFF: + /* + * After 20ms of low pwm signal level, the chip turns + * off automatically. In this case, before enabling the + * chip again, we must wait about 10ms for pwm signal to + * stabilize. + */ + if (brightness > 0) { + msleep(10); + mp3309c_enable_device(chip); + chip->pdata->status = BACKLIGHT_ON; + } else { + chip->pdata->status = BACKLIGHT_OFF; + } + break; + case BACKLIGHT_ON: + if (brightness == 0) + chip->pdata->status = BACKLIGHT_OFF; + break; + } + } else { + /* + * Analog (by I2C command) control mode + * + * The first time, before setting brightness, we must enable the + * device + */ + if (chip->pdata->status == FIRST_POWER_ON) + mp3309c_enable_device(chip); + + /* + * Dimming mode I2C command (fixed dimming range 0..31) + * + * The 5 bits of the dimming analog value D4..D0 is allocated + * in the I2C register #0, in the following way: + * + * +--+--+--+--+--+--+--+--+ + * |EN|D0|D1|D2|D3|D4|XX|XX| + * +--+--+--+--+--+--+--+--+ + */ + analog_val = brightness; + bits_val = 0; + for (i = 0; i <= 5; i++) + bits_val += ((analog_val >> i) & 0x01) << (6 - i); + ret = regmap_update_bits(chip->regmap, REG_I2C_0, + ANALOG_I2C_REG_MASK, bits_val); + if (ret) + return ret; + + if (brightness > 0) + chip->pdata->status = BACKLIGHT_ON; + else + chip->pdata->status = BACKLIGHT_OFF; + } + + return 0; +} + +static const struct backlight_ops mp3309c_bl_ops = { + .update_status = mp3309c_bl_update_status, +}; + +static int pm3309c_parse_dt_node(struct mp3309c_chip *chip, + struct mp3309c_platform_data *pdata) +{ + struct device_node *node = chip->dev->of_node; + struct property *prop_pwms; + struct property *prop_levels = NULL; + int length = 0; + int ret, i; + unsigned int num_levels, tmp_value; + + if (!node) { + dev_err(chip->dev, "failed to get DT node\n"); + return -ENODEV; + } + + /* + * Dimming mode: the MP3309C provides two dimming control mode: + * + * - PWM mode + * - Analog by I2C control mode (default) + * + * I2C control mode is assumed as default but, if the pwms property is + * found in the backlight node, the mode switches to PWM mode. + */ + pdata->dimming_mode = DIMMING_ANALOG_I2C; + prop_pwms = of_find_property(node, "pwms", &length); + if (prop_pwms) { + chip->pwmd = devm_pwm_get(chip->dev, NULL); + if (IS_ERR(chip->pwmd)) + return dev_err_probe(chip->dev, PTR_ERR(chip->pwmd), + "error getting pwm data\n"); + pdata->dimming_mode = DIMMING_PWM; + pwm_apply_args(chip->pwmd); + } + + /* + * In I2C control mode the dimming levels (0..31) are fixed by the + * hardware, while in PWM control mode they can be chosen by the user, + * to allow nonlinear mappings. + */ + if (pdata->dimming_mode == DIMMING_ANALOG_I2C) { + /* + * Analog (by I2C commands) control mode: fixed 0..31 brightness + * levels + */ + num_levels = ANALOG_I2C_NUM_LEVELS; + + /* Enable GPIO used in I2C dimming mode only */ + chip->enable_gpio = devm_gpiod_get(chip->dev, "enable", + GPIOD_OUT_HIGH); + if (IS_ERR(chip->enable_gpio)) + return dev_err_probe(chip->dev, + PTR_ERR(chip->enable_gpio), + "error getting enable gpio\n"); + } else { + /* + * PWM control mode: check for brightness level in DT + */ + prop_levels = of_find_property(node, "brightness-levels", + &length); + if (prop_levels) { + /* Read brightness levels from DT */ + num_levels = length / sizeof(u32); + if (num_levels < 2) + return -EINVAL; + } else { + /* Use default brightness levels */ + num_levels = MP3309C_PWM_DEFAULT_NUM_LEVELS; + } + } + + /* Fill brightness levels array */ + pdata->levels = devm_kcalloc(chip->dev, num_levels, + sizeof(*pdata->levels), GFP_KERNEL); + if (!pdata->levels) + return -ENOMEM; + if (prop_levels) { + ret = of_property_read_u32_array(node, "brightness-levels", + pdata->levels, + num_levels); + if (ret < 0) + return ret; + } else { + for (i = 0; i < num_levels; i++) + pdata->levels[i] = i; + } + + pdata->max_brightness = num_levels - 1; + + ret = of_property_read_u32(node, "default-brightness", + &pdata->default_brightness); + if (ret) + pdata->default_brightness = pdata->max_brightness; + if (pdata->default_brightness > pdata->max_brightness) { + dev_err(chip->dev, + "default brightness exceeds max brightness\n"); + pdata->default_brightness = pdata->max_brightness; + } + + /* + * Over-voltage protection (OVP) + * + * This (optional) property values are: + * + * - 13.5V + * - 24V + * - 35.5V (hardware default setting) + * + * If missing, the default value for OVP is 35.5V + */ + pdata->over_voltage_protection = REG_I2C_1_OVP1; + if (!of_property_read_u32(node, "mps,overvoltage-protection-microvolt", + &tmp_value)) { + switch (tmp_value) { + case 13500000: + pdata->over_voltage_protection = 0x00; + break; + case 24000000: + pdata->over_voltage_protection = REG_I2C_1_OVP0; + break; + case 35500000: + pdata->over_voltage_protection = REG_I2C_1_OVP1; + break; + default: + return -EINVAL; + } + } + + /* Synchronous (default) and non-synchronous mode */ + pdata->sync_mode = true; + if (of_property_read_bool(node, "mps,no-sync-mode")) + pdata->sync_mode = false; + + return 0; +} + +static int mp3309c_probe(struct i2c_client *client) +{ + struct mp3309c_platform_data *pdata = dev_get_platdata(&client->dev); + struct mp3309c_chip *chip; + struct backlight_properties props; + struct pwm_state pwmstate; + int ret; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(&client->dev, "failed to check i2c functionality\n"); + return -EOPNOTSUPP; + } + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->dev = &client->dev; + + chip->regmap = devm_regmap_init_i2c(client, &mp3309c_regmap); + if (IS_ERR(chip->regmap)) + return dev_err_probe(&client->dev, PTR_ERR(chip->regmap), + "failed to allocate register map\n"); + + i2c_set_clientdata(client, chip); + + if (!pdata) { + pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + ret = pm3309c_parse_dt_node(chip, pdata); + if (ret) + return ret; + } + chip->pdata = pdata; + + /* Backlight properties */ + props.brightness = pdata->default_brightness; + props.max_brightness = pdata->max_brightness; + props.scale = BACKLIGHT_SCALE_LINEAR; + props.type = BACKLIGHT_RAW; + props.power = FB_BLANK_UNBLANK; + props.fb_blank = FB_BLANK_UNBLANK; + chip->bl = devm_backlight_device_register(chip->dev, "mp3309c", + chip->dev, chip, + &mp3309c_bl_ops, &props); + if (IS_ERR(chip->bl)) + return dev_err_probe(chip->dev, PTR_ERR(chip->bl), + "error registering backlight device\n"); + + /* In PWM dimming mode, enable pwm device */ + if (chip->pdata->dimming_mode == DIMMING_PWM) { + pwm_init_state(chip->pwmd, &pwmstate); + pwm_set_relative_duty_cycle(&pwmstate, + chip->pdata->default_brightness, + chip->pdata->max_brightness); + pwmstate.enabled = true; + ret = pwm_apply_state(chip->pwmd, &pwmstate); + if (ret) + return dev_err_probe(chip->dev, ret, + "error setting pwm device\n"); + } + + chip->pdata->status = FIRST_POWER_ON; + backlight_update_status(chip->bl); + + return 0; +} + +static void mp3309c_remove(struct i2c_client *client) +{ + struct mp3309c_chip *chip = i2c_get_clientdata(client); + struct backlight_device *bl = chip->bl; + + bl->props.power = FB_BLANK_POWERDOWN; + bl->props.brightness = 0; + backlight_update_status(chip->bl); +} + +static const struct of_device_id mp3309c_match_table[] = { + { .compatible = "mps,mp3309c", }, + { }, +}; +MODULE_DEVICE_TABLE(of, mp3309c_match_table); + +static const struct i2c_device_id mp3309c_id[] = { + { "mp3309c", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mp3309c_id); + +static struct i2c_driver mp3309c_i2c_driver = { + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = mp3309c_match_table, + }, + .probe = mp3309c_probe, + .remove = mp3309c_remove, + .id_table = mp3309c_id, +}; + +module_i2c_driver(mp3309c_i2c_driver); + +MODULE_DESCRIPTION("Backlight Driver for MPS MP3309C"); +MODULE_AUTHOR("Flavio Suligoi "); +MODULE_LICENSE("GPL"); diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 35c716e9043c3d5ee6e1466010b7c82ead0c38ad..ffcebf6aa76a967a05716a1db8f316295a0c2f66 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -461,10 +461,9 @@ static int pwm_backlight_probe(struct platform_device *pdev) if (!data) { ret = pwm_backlight_parse_dt(&pdev->dev, &defdata); - if (ret < 0) { - dev_err(&pdev->dev, "failed to find platform data\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to find platform data\n"); data = &defdata; } @@ -493,24 +492,27 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->enable_gpio = devm_gpiod_get_optional(&pdev->dev, "enable", GPIOD_ASIS); if (IS_ERR(pb->enable_gpio)) { - ret = PTR_ERR(pb->enable_gpio); + ret = dev_err_probe(&pdev->dev, PTR_ERR(pb->enable_gpio), + "failed to acquire enable GPIO\n"); goto err_alloc; } pb->power_supply = devm_regulator_get_optional(&pdev->dev, "power"); if (IS_ERR(pb->power_supply)) { ret = PTR_ERR(pb->power_supply); - if (ret == -ENODEV) + if (ret == -ENODEV) { pb->power_supply = NULL; - else + } else { + dev_err_probe(&pdev->dev, ret, + "failed to acquire power regulator\n"); goto err_alloc; + } } pb->pwm = devm_pwm_get(&pdev->dev, NULL); if (IS_ERR(pb->pwm)) { - ret = PTR_ERR(pb->pwm); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "unable to request PWM\n"); + ret = dev_err_probe(&pdev->dev, PTR_ERR(pb->pwm), + "unable to request PWM\n"); goto err_alloc; } @@ -530,8 +532,8 @@ static int pwm_backlight_probe(struct platform_device *pdev) ret = pwm_apply_might_sleep(pb->pwm, &state); if (ret) { - dev_err(&pdev->dev, "failed to apply initial PWM state: %d\n", - ret); + dev_err_probe(&pdev->dev, ret, + "failed to apply initial PWM state"); goto err_alloc; } @@ -568,8 +570,8 @@ static int pwm_backlight_probe(struct platform_device *pdev) ret = pwm_backlight_brightness_default(&pdev->dev, data, state.period); if (ret < 0) { - dev_err(&pdev->dev, - "failed to setup default brightness table\n"); + dev_err_probe(&pdev->dev, ret, + "failed to setup default brightness table\n"); goto err_alloc; } @@ -597,8 +599,8 @@ static int pwm_backlight_probe(struct platform_device *pdev) bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb, &pwm_backlight_ops, &props); if (IS_ERR(bl)) { - dev_err(&pdev->dev, "failed to register backlight\n"); - ret = PTR_ERR(bl); + ret = dev_err_probe(&pdev->dev, PTR_ERR(bl), + "failed to register backlight\n"); goto err_alloc; } diff --git a/drivers/video/fbdev/core/fb_ddc.c b/drivers/video/fbdev/core/fb_ddc.c index 8bf5f2f54be7b8760fd89b027fd3d686c00ada14..e2514321986297bda06f3c29ce7bfae3cc6fe2ef 100644 --- a/drivers/video/fbdev/core/fb_ddc.c +++ b/drivers/video/fbdev/core/fb_ddc.c @@ -116,7 +116,6 @@ unsigned char *fb_ddc_read(struct i2c_adapter *adapter) algo_data->setsda(algo_data->data, 1); algo_data->setscl(algo_data->data, 1); - adapter->class |= I2C_CLASS_DDC; return edid; } diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 63af6ab034b5f1bb45992a4074f8862d528b38d3..1183e7a871f8b270a9ff2106cef15e44720184a4 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -631,8 +631,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (logo_lines > vc->vc_bottom) { logo_shown = FBCON_LOGO_CANSHOW; - printk(KERN_INFO - "fbcon_init: disable boot-logo (boot-logo bigger than screen).\n"); + pr_info("fbcon: disable boot-logo (boot-logo bigger than screen).\n"); } else { logo_shown = FBCON_LOGO_DRAW; vc->vc_top = logo_lines; diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c index abb87d3576db0f178c86d480dc6973f86ca8e453..986760b90465fb117f502e702b3ca31c90efe13f 100644 --- a/drivers/video/fbdev/cyber2000fb.c +++ b/drivers/video/fbdev/cyber2000fb.c @@ -1227,7 +1227,6 @@ static int cyber2000fb_setup_ddc_bus(struct cfb_info *cfb) strscpy(cfb->ddc_adapter.name, cfb->fb.fix.id, sizeof(cfb->ddc_adapter.name)); cfb->ddc_adapter.owner = THIS_MODULE; - cfb->ddc_adapter.class = I2C_CLASS_DDC; cfb->ddc_adapter.algo_data = &cfb->ddc_algo; cfb->ddc_adapter.dev.parent = cfb->fb.device; cfb->ddc_algo.setsda = cyber2000fb_ddc_setsda; diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index 1897e65ab7031dfbf9f3e907ac245661bc88ba18..9b74dae71472c459e1d46eb0e0890ccb5debc9e0 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -163,7 +163,6 @@ static int i740fb_setup_ddc_bus(struct fb_info *info) strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; - par->ddc_adapter.class = I2C_CLASS_DDC; par->ddc_adapter.algo_data = &par->ddc_algo; par->ddc_adapter.dev.parent = info->device; par->ddc_algo.setsda = i740fb_ddc_setsda; diff --git a/drivers/video/fbdev/matrox/i2c-matroxfb.c b/drivers/video/fbdev/matrox/i2c-matroxfb.c index e2e4705e3fe0e21b9e178e1b8ae50277480c6244..bb048e14b2cf1cb763d7547482e651769febf923 100644 --- a/drivers/video/fbdev/matrox/i2c-matroxfb.c +++ b/drivers/video/fbdev/matrox/i2c-matroxfb.c @@ -100,8 +100,7 @@ static const struct i2c_algo_bit_data matrox_i2c_algo_template = }; static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo, - unsigned int data, unsigned int clock, const char *name, - int class) + unsigned int data, unsigned int clock, const char *name) { int err; @@ -112,7 +111,6 @@ static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo, snprintf(b->adapter.name, sizeof(b->adapter.name), name, minfo->fbcon.node); i2c_set_adapdata(&b->adapter, b); - b->adapter.class = class; b->adapter.algo_data = &b->bac; b->adapter.dev.parent = &minfo->pcidev->dev; b->bac = matrox_i2c_algo_template; @@ -160,27 +158,24 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) { case MGA_2164: err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, - "DDC:fb%u #0", I2C_CLASS_DDC); + "DDC:fb%u #0"); break; default: err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, - "DDC:fb%u #0", I2C_CLASS_DDC); + "DDC:fb%u #0"); break; } if (err) goto fail_ddc1; if (minfo->devflags.dualhead) { - err = i2c_bus_reg(&m2info->ddc2, minfo, - DDC2_DATA, DDC2_CLK, - "DDC:fb%u #1", I2C_CLASS_DDC); + err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1"); if (err == -ENODEV) { printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n"); } else if (err) printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n"); /* Register maven bus even on G450/G550 */ - err = i2c_bus_reg(&m2info->maven, minfo, - MAT_DATA, MAT_CLK, "MAVEN:fb%u", 0); + err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u"); if (err) printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n"); else { diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index 589b349cb63e08983e03624c9ef7fea34da11623..07722a5ea8eff8a63e5827e4a79349f00689ee46 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -252,7 +252,6 @@ static int s3fb_setup_ddc_bus(struct fb_info *info) strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; - par->ddc_adapter.class = I2C_CLASS_DDC; par->ddc_adapter.algo_data = &par->ddc_algo; par->ddc_adapter.dev.parent = info->device; par->ddc_algo.setsda = s3fb_ddc_setsda; diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index dddd6afcb972a5c23a5969c2ced0638ccf0b5b34..ebc9aeffdde7c54321b19499715e128d594c0e61 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 803ccb6aa479703bc1cb88237b4c3adc594a75a2..009bf1d926448011292c182e7eee29c25930ed6d 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1444,6 +1444,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 2de0e675fd1504da67b7110ee81152934ad2cbad..8e5bac27542d915534c3071ec5f64e89727c2c11 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1158,7 +1158,7 @@ stifb_init_display(struct stifb_info *fb) } break; } - stifb_blank(0, (struct fb_info *)fb); /* 0=enable screen */ + stifb_blank(0, fb->info); /* 0=enable screen */ SETUP_FB(fb); } diff --git a/drivers/video/fbdev/tdfxfb.c b/drivers/video/fbdev/tdfxfb.c index 22aa953138b0f339f0c4f077c02276d0f86dea29..51ebe78359ec34e4aff34a29d60275131966adb3 100644 --- a/drivers/video/fbdev/tdfxfb.c +++ b/drivers/video/fbdev/tdfxfb.c @@ -1267,7 +1267,6 @@ static int tdfxfb_setup_ddc_bus(struct tdfxfb_i2c_chan *chan, const char *name, strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; - chan->adapter.class = I2C_CLASS_DDC; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = dev; chan->algo.setsda = tdfxfb_ddc_setsda; diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c index 816d40b6f689cb54051324803a469093b129e9bd..516cf2a187575da096f233591fb35bb6f7c23d7b 100644 --- a/drivers/video/fbdev/tridentfb.c +++ b/drivers/video/fbdev/tridentfb.c @@ -274,7 +274,6 @@ static int tridentfb_setup_ddc_bus(struct fb_info *info) strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; - par->ddc_adapter.class = I2C_CLASS_DDC; par->ddc_adapter.algo_data = &par->ddc_algo; par->ddc_adapter.dev.parent = info->device; if (is_oldclock(par->chip_id)) { /* not sure if this check is OK */ diff --git a/drivers/video/fbdev/via/via_i2c.c b/drivers/video/fbdev/via/via_i2c.c index c35e530e0ec9d775668ad4924c1e21a8c8cc3c9b..582502810575970f11fd646f491f4e806f44225d 100644 --- a/drivers/video/fbdev/via/via_i2c.c +++ b/drivers/video/fbdev/via/via_i2c.c @@ -201,7 +201,6 @@ static int create_i2c_bus(struct i2c_adapter *adapter, sprintf(adapter->name, "viafb i2c io_port idx 0x%02x", adap_cfg->ioport_index); adapter->owner = THIS_MODULE; - adapter->class = I2C_CLASS_DDC; adapter->algo_data = algo; if (pdev) adapter->dev.parent = &pdev->dev; diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c index 42c25dc851976c5fa823b89fc4f72e5826d17459..ac73937073a76f7d22df39a503ac59bda2d4a7da 100644 --- a/drivers/video/fbdev/vt8500lcdfb.c +++ b/drivers/video/fbdev/vt8500lcdfb.c @@ -374,7 +374,6 @@ static int vt8500lcd_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "no IRQ defined\n"); ret = -ENODEV; goto failed_free_palette; } diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 8f4730f11d27bb3742a5eb36bcbf1024d5f4915b..f763b00d80406864182f21add693b7e2c9e9cd0d 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -60,6 +60,11 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_PCI_ADMIN_LEGACY + bool + depends on VIRTIO_PCI && (X86 || COMPILE_TEST) + default y + config VIRTIO_PCI_LEGACY bool "Support for legacy virtio draft 0.9.X and older devices" default y diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 8e98d24917cc053e820e8771644c874c233a528e..73ace62af44093826691b97b251e1af415b00702 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o +virtio_pci-$(CONFIG_VIRTIO_PCI_ADMIN_LEGACY) += virtio_pci_admin_legacy_io.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3893dc29eb26338f036379c5664e249ad18f1df6..f4080692b3513bd67a6d946c355c2cfbd7496ecd 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; + if (dev->config->create_avq) { + err = dev->config->create_avq(dev); + if (err) + goto err; + } + err = drv->probe(dev); if (err) - goto err; + goto err_probe; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -316,6 +322,10 @@ static int virtio_dev_probe(struct device *_d) virtio_config_enable(dev); return 0; + +err_probe: + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -331,6 +341,9 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); + /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -489,13 +502,20 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device); int virtio_device_freeze(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; virtio_config_disable(dev); dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - if (drv && drv->freeze) - return drv->freeze(dev); + if (drv && drv->freeze) { + ret = drv->freeze(dev); + if (ret) + return ret; + } + + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); return 0; } @@ -532,10 +552,16 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; + if (dev->config->create_avq) { + ret = dev->config->create_avq(dev); + if (ret) + goto err; + } + if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err; + goto err_restore; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -546,6 +572,9 @@ int virtio_device_restore(struct virtio_device *dev) return 0; +err_restore: + if (dev->config->destroy_avq) + dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 59cdc0292dce123613655d667e42819565d12901..1f5b3dd31fcfc9988282c9c65cee7a6d95475b9e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -119,6 +119,11 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + /* State for keeping the wakeup_source active while adjusting the balloon */ + spinlock_t adjustment_lock; + bool adjustment_signal_pending; + bool adjustment_in_progress; }; static const struct virtio_device_id id_table[] = { @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) queue_work(vb->balloon_wq, &vb->report_free_page_work); } +static void start_update_balloon_size(struct virtio_balloon *vb) +{ + unsigned long flags; + + spin_lock_irqsave(&vb->adjustment_lock, flags); + vb->adjustment_signal_pending = true; + if (!vb->adjustment_in_progress) { + vb->adjustment_in_progress = true; + pm_stay_awake(vb->vdev->dev.parent); + } + spin_unlock_irqrestore(&vb->adjustment_lock, flags); + + queue_work(system_freezable_wq, &vb->update_balloon_size_work); +} + +static void end_update_balloon_size(struct virtio_balloon *vb) +{ + spin_lock_irq(&vb->adjustment_lock); + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { + vb->adjustment_in_progress = false; + pm_relax(vb->vdev->dev.parent); + } + spin_unlock_irq(&vb->adjustment_lock); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev) spin_lock_irqsave(&vb->stop_update_lock, flags); if (!vb->stop_update) { - queue_work(system_freezable_wq, - &vb->update_balloon_size_work); + start_update_balloon_size(vb); virtio_balloon_queue_free_page_work(vb); } spin_unlock_irqrestore(&vb->stop_update_lock, flags); @@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - diff = towards_target(vb); - if (!diff) - return; + spin_lock_irq(&vb->adjustment_lock); + vb->adjustment_signal_pending = false; + spin_unlock_irq(&vb->adjustment_lock); - if (diff > 0) - diff -= fill_balloon(vb, diff); - else - diff += leak_balloon(vb, -diff); - update_balloon_size(vb); + diff = towards_target(vb); + + if (diff) { + if (diff > 0) + diff -= fill_balloon(vb, diff); + else + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + } if (diff) queue_work(system_freezable_wq, work); + else + end_update_balloon_size(vb); } static int init_vqs(struct virtio_balloon *vb) @@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } + spin_lock_init(&vb->adjustment_lock); + virtio_device_ready(vdev); if (towards_target(vb)) diff --git a/drivers/virtio/virtio_pci_admin_legacy_io.c b/drivers/virtio/virtio_pci_admin_legacy_io.c new file mode 100644 index 0000000000000000000000000000000000000000..819cfbbc67c3bd0cf8e4c6d61828d3fb8deb7193 --- /dev/null +++ b/drivers/virtio/virtio_pci_admin_legacy_io.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include +#include "virtio_pci_common.h" + +/* + * virtio_pci_admin_has_legacy_io - Checks whether the legacy IO + * commands are supported + * @dev: VF pci_dev + * + * Returns true on success. + */ +bool virtio_pci_admin_has_legacy_io(struct pci_dev *pdev) +{ + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); + struct virtio_pci_device *vp_dev; + + if (!virtio_dev) + return false; + + if (!virtio_has_feature(virtio_dev, VIRTIO_F_ADMIN_VQ)) + return false; + + vp_dev = to_vp_device(virtio_dev); + + if ((vp_dev->admin_vq.supported_cmds & VIRTIO_LEGACY_ADMIN_CMD_BITMAP) == + VIRTIO_LEGACY_ADMIN_CMD_BITMAP) + return true; + return false; +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_has_legacy_io); + +static int virtio_pci_admin_legacy_io_write(struct pci_dev *pdev, u16 opcode, + u8 offset, u8 size, u8 *buf) +{ + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); + struct virtio_admin_cmd_legacy_wr_data *data; + struct virtio_admin_cmd cmd = {}; + struct scatterlist data_sg; + int vf_id; + int ret; + + if (!virtio_dev) + return -ENODEV; + + vf_id = pci_iov_vf_id(pdev); + if (vf_id < 0) + return vf_id; + + data = kzalloc(sizeof(*data) + size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->offset = offset; + memcpy(data->registers, buf, size); + sg_init_one(&data_sg, data, sizeof(*data) + size); + cmd.opcode = cpu_to_le16(opcode); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_member_id = cpu_to_le64(vf_id + 1); + cmd.data_sg = &data_sg; + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); + + kfree(data); + return ret; +} + +/* + * virtio_pci_admin_legacy_io_write_common - Write legacy common configuration + * of a member device + * @dev: VF pci_dev + * @offset: starting byte offset within the common configuration area to write to + * @size: size of the data to write + * @buf: buffer which holds the data + * + * Note: caller must serialize access for the given device. + * Returns 0 on success, or negative on failure. + */ +int virtio_pci_admin_legacy_common_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf) +{ + return virtio_pci_admin_legacy_io_write(pdev, + VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE, + offset, size, buf); +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_legacy_common_io_write); + +/* + * virtio_pci_admin_legacy_io_write_device - Write legacy device configuration + * of a member device + * @dev: VF pci_dev + * @offset: starting byte offset within the device configuration area to write to + * @size: size of the data to write + * @buf: buffer which holds the data + * + * Note: caller must serialize access for the given device. + * Returns 0 on success, or negative on failure. + */ +int virtio_pci_admin_legacy_device_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf) +{ + return virtio_pci_admin_legacy_io_write(pdev, + VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE, + offset, size, buf); +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_legacy_device_io_write); + +static int virtio_pci_admin_legacy_io_read(struct pci_dev *pdev, u16 opcode, + u8 offset, u8 size, u8 *buf) +{ + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); + struct virtio_admin_cmd_legacy_rd_data *data; + struct scatterlist data_sg, result_sg; + struct virtio_admin_cmd cmd = {}; + int vf_id; + int ret; + + if (!virtio_dev) + return -ENODEV; + + vf_id = pci_iov_vf_id(pdev); + if (vf_id < 0) + return vf_id; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->offset = offset; + sg_init_one(&data_sg, data, sizeof(*data)); + sg_init_one(&result_sg, buf, size); + cmd.opcode = cpu_to_le16(opcode); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_member_id = cpu_to_le64(vf_id + 1); + cmd.data_sg = &data_sg; + cmd.result_sg = &result_sg; + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); + + kfree(data); + return ret; +} + +/* + * virtio_pci_admin_legacy_device_io_read - Read legacy device configuration of + * a member device + * @dev: VF pci_dev + * @offset: starting byte offset within the device configuration area to read from + * @size: size of the data to be read + * @buf: buffer to hold the returned data + * + * Note: caller must serialize access for the given device. + * Returns 0 on success, or negative on failure. + */ +int virtio_pci_admin_legacy_device_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf) +{ + return virtio_pci_admin_legacy_io_read(pdev, + VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ, + offset, size, buf); +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_legacy_device_io_read); + +/* + * virtio_pci_admin_legacy_common_io_read - Read legacy common configuration of + * a member device + * @dev: VF pci_dev + * @offset: starting byte offset within the common configuration area to read from + * @size: size of the data to be read + * @buf: buffer to hold the returned data + * + * Note: caller must serialize access for the given device. + * Returns 0 on success, or negative on failure. + */ +int virtio_pci_admin_legacy_common_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf) +{ + return virtio_pci_admin_legacy_io_read(pdev, + VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ, + offset, size, buf); +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_legacy_common_io_read); + +/* + * virtio_pci_admin_legacy_io_notify_info - Read the queue notification + * information for legacy interface + * @dev: VF pci_dev + * @req_bar_flags: requested bar flags + * @bar: on output the BAR number of the owner or member device + * @bar_offset: on output the offset within bar + * + * Returns 0 on success, or negative on failure. + */ +int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, + u8 req_bar_flags, u8 *bar, + u64 *bar_offset) +{ + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); + struct virtio_admin_cmd_notify_info_result *result; + struct virtio_admin_cmd cmd = {}; + struct scatterlist result_sg; + int vf_id; + int ret; + + if (!virtio_dev) + return -ENODEV; + + vf_id = pci_iov_vf_id(pdev); + if (vf_id < 0) + return vf_id; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) + return -ENOMEM; + + sg_init_one(&result_sg, result, sizeof(*result)); + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_member_id = cpu_to_le64(vf_id + 1); + cmd.result_sg = &result_sg; + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); + if (!ret) { + struct virtio_admin_cmd_notify_info_data *entry; + int i; + + ret = -ENOENT; + for (i = 0; i < VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO; i++) { + entry = &result->entries[i]; + if (entry->flags == VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END) + break; + if (entry->flags != req_bar_flags) + continue; + *bar = entry->bar; + *bar_offset = le64_to_cpu(entry->offset); + ret = 0; + break; + } + } + + kfree(result); + return ret; +} +EXPORT_SYMBOL_GPL(virtio_pci_admin_legacy_io_notify_info); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 1d21d1a1b3f55a9a2410396ec8e9d3b2cfcbdb13..b655fccaf77330b08b23a57d7e2fe1d012409615 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -236,6 +236,9 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + if (vp_dev->is_avq(vdev, vq->index)) + continue; + if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; @@ -674,6 +677,17 @@ static struct pci_driver virtio_pci_driver = { .sriov_configure = virtio_pci_sriov_configure, }; +struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev) +{ + struct virtio_pci_device *pf_vp_dev; + + pf_vp_dev = pci_iov_get_pf_drvdata(pdev, &virtio_pci_driver); + if (IS_ERR(pf_vp_dev)) + return NULL; + + return &pf_vp_dev->vdev; +} + module_pci_driver(virtio_pci_driver); MODULE_AUTHOR("Anthony Liguori "); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 4b773bd7c58cb7e42726127cdd188a181c8c04db..7fef52bee4557cc5cadf492ba6e49b76a00e3c3a 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -29,6 +29,7 @@ #include #include #include +#include struct virtio_pci_vq_info { /* the actual virtqueue */ @@ -41,6 +42,17 @@ struct virtio_pci_vq_info { unsigned int msix_vector; }; +struct virtio_pci_admin_vq { + /* Virtqueue info associated with this admin queue. */ + struct virtio_pci_vq_info info; + /* serializing admin commands execution and virtqueue deletion */ + struct mutex cmd_lock; + u64 supported_cmds; + /* Name of the admin queue: avq.$vq_index. */ + char name[10]; + u16 vq_index; +}; + /* Our device structure */ struct virtio_pci_device { struct virtio_device vdev; @@ -58,9 +70,13 @@ struct virtio_pci_device { spinlock_t lock; struct list_head virtqueues; - /* array of all queues for house-keeping */ + /* Array of all virtqueues reported in the + * PCI common config num_queues field + */ struct virtio_pci_vq_info **vqs; + struct virtio_pci_admin_vq admin_vq; + /* MSI-X support */ int msix_enabled; int intx_enabled; @@ -86,6 +102,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); + bool (*is_avq)(struct virtio_device *vdev, unsigned int index); }; /* Constants for MSI-X */ @@ -139,4 +156,27 @@ static inline void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) int virtio_pci_modern_probe(struct virtio_pci_device *); void virtio_pci_modern_remove(struct virtio_pci_device *); +struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev); + +#define VIRTIO_LEGACY_ADMIN_CMD_BITMAP \ + (BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE) | \ + BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ) | \ + BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE) | \ + BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ) | \ + BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO)) + +/* Unlike modern drivers which support hardware virtio devices, legacy drivers + * assume software-based devices: e.g. they don't use proper memory barriers + * on ARM, use big endian on PPC, etc. X86 drivers are mostly ok though, more + * or less by chance. For now, only support legacy IO on X86. + */ +#ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY +#define VIRTIO_ADMIN_CMD_BITMAP VIRTIO_LEGACY_ADMIN_CMD_BITMAP +#else +#define VIRTIO_ADMIN_CMD_BITMAP 0 +#endif + +int vp_modern_admin_cmd_exec(struct virtio_device *vdev, + struct virtio_admin_cmd *cmd); + #endif diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index ee6a386d250b168bdd59153a62ddceb361c0af93..f62b530aa3b5b0e6d555ffef8e39c7bd5fda63a5 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -19,6 +19,8 @@ #define VIRTIO_RING_NO_LEGACY #include "virtio_pci_common.h" +#define VIRTIO_AVQ_SGS_MAX 4 + static u64 vp_get_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -26,6 +28,187 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return false; + + return index == vp_dev->admin_vq.vq_index; +} + +static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, + u16 opcode, + struct scatterlist **sgs, + unsigned int out_num, + unsigned int in_num, + void *data) +{ + struct virtqueue *vq; + int ret, len; + + vq = admin_vq->info.vq; + if (!vq) + return -EIO; + + if (opcode != VIRTIO_ADMIN_CMD_LIST_QUERY && + opcode != VIRTIO_ADMIN_CMD_LIST_USE && + !((1ULL << opcode) & admin_vq->supported_cmds)) + return -EOPNOTSUPP; + + ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, data, GFP_KERNEL); + if (ret < 0) + return -EIO; + + if (unlikely(!virtqueue_kick(vq))) + return -EIO; + + while (!virtqueue_get_buf(vq, &len) && + !virtqueue_is_broken(vq)) + cpu_relax(); + + if (virtqueue_is_broken(vq)) + return -EIO; + + return 0; +} + +int vp_modern_admin_cmd_exec(struct virtio_device *vdev, + struct virtio_admin_cmd *cmd) +{ + struct scatterlist *sgs[VIRTIO_AVQ_SGS_MAX], hdr, stat; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_admin_cmd_status *va_status; + unsigned int out_num = 0, in_num = 0; + struct virtio_admin_cmd_hdr *va_hdr; + u16 status; + int ret; + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return -EOPNOTSUPP; + + va_status = kzalloc(sizeof(*va_status), GFP_KERNEL); + if (!va_status) + return -ENOMEM; + + va_hdr = kzalloc(sizeof(*va_hdr), GFP_KERNEL); + if (!va_hdr) { + ret = -ENOMEM; + goto err_alloc; + } + + va_hdr->opcode = cmd->opcode; + va_hdr->group_type = cmd->group_type; + va_hdr->group_member_id = cmd->group_member_id; + + /* Add header */ + sg_init_one(&hdr, va_hdr, sizeof(*va_hdr)); + sgs[out_num] = &hdr; + out_num++; + + if (cmd->data_sg) { + sgs[out_num] = cmd->data_sg; + out_num++; + } + + /* Add return status */ + sg_init_one(&stat, va_status, sizeof(*va_status)); + sgs[out_num + in_num] = &stat; + in_num++; + + if (cmd->result_sg) { + sgs[out_num + in_num] = cmd->result_sg; + in_num++; + } + + mutex_lock(&vp_dev->admin_vq.cmd_lock); + ret = virtqueue_exec_admin_cmd(&vp_dev->admin_vq, + le16_to_cpu(cmd->opcode), + sgs, out_num, in_num, sgs); + mutex_unlock(&vp_dev->admin_vq.cmd_lock); + + if (ret) { + dev_err(&vdev->dev, + "Failed to execute command on admin vq: %d\n.", ret); + goto err_cmd_exec; + } + + status = le16_to_cpu(va_status->status); + if (status != VIRTIO_ADMIN_STATUS_OK) { + dev_err(&vdev->dev, + "admin command error: status(%#x) qualifier(%#x)\n", + status, le16_to_cpu(va_status->status_qualifier)); + ret = -status; + } + +err_cmd_exec: + kfree(va_hdr); +err_alloc: + kfree(va_status); + return ret; +} + +static void virtio_pci_admin_cmd_list_init(struct virtio_device *virtio_dev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(virtio_dev); + struct virtio_admin_cmd cmd = {}; + struct scatterlist result_sg; + struct scatterlist data_sg; + __le64 *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return; + + sg_init_one(&result_sg, data, sizeof(*data)); + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_LIST_QUERY); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.result_sg = &result_sg; + + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); + if (ret) + goto end; + + *data &= cpu_to_le64(VIRTIO_ADMIN_CMD_BITMAP); + sg_init_one(&data_sg, data, sizeof(*data)); + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_LIST_USE); + cmd.data_sg = &data_sg; + cmd.result_sg = NULL; + + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); + if (ret) + goto end; + + vp_dev->admin_vq.supported_cmds = le64_to_cpu(*data); +end: + kfree(data); +} + +static void vp_modern_avq_activate(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return; + + __virtqueue_unbreak(admin_vq->info.vq); + virtio_pci_admin_cmd_list_init(vdev); +} + +static void vp_modern_avq_deactivate(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return; + + __virtqueue_break(admin_vq->info.vq); +} + static void vp_transport_features(struct virtio_device *vdev, u64 features) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -37,6 +220,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) if (features & BIT_ULL(VIRTIO_F_RING_RESET)) __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); + + if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ)) + __virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ); } static int __vp_check_common_size_one_feature(struct virtio_device *vdev, u32 fbit, @@ -69,6 +255,9 @@ static int vp_check_common_size(struct virtio_device *vdev) if (vp_check_common_size_one_feature(vdev, VIRTIO_F_RING_RESET, queue_reset)) return -EINVAL; + if (vp_check_common_size_one_feature(vdev, VIRTIO_F_ADMIN_VQ, admin_queue_num)) + return -EINVAL; + return 0; } @@ -195,6 +384,8 @@ static void vp_set_status(struct virtio_device *vdev, u8 status) /* We should never be setting status to 0. */ BUG_ON(status == 0); vp_modern_set_status(&vp_dev->mdev, status); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) + vp_modern_avq_activate(vdev); } static void vp_reset(struct virtio_device *vdev) @@ -211,6 +402,9 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); + + vp_modern_avq_deactivate(vdev); + /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); } @@ -345,6 +539,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtio_pci_modern_device *mdev = &vp_dev->mdev; bool (*notify)(struct virtqueue *vq); struct virtqueue *vq; + bool is_avq; u16 num; int err; @@ -353,11 +548,13 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, else notify = vp_notify; - if (index >= vp_modern_get_num_queues(mdev)) + is_avq = vp_is_avq(&vp_dev->vdev, index); + if (index >= vp_modern_get_num_queues(mdev) && !is_avq) return ERR_PTR(-EINVAL); + num = is_avq ? + VIRTIO_AVQ_SGS_MAX : vp_modern_get_queue_size(mdev, index); /* Check if queue is either not available or already active. */ - num = vp_modern_get_queue_size(mdev, index); if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); @@ -383,6 +580,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, goto err; } + if (is_avq) { + mutex_lock(&vp_dev->admin_vq.cmd_lock); + vp_dev->admin_vq.info.vq = vq; + mutex_unlock(&vp_dev->admin_vq.cmd_lock); + } + return vq; err: @@ -418,6 +621,12 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; + if (vp_is_avq(&vp_dev->vdev, vq->index)) { + mutex_lock(&vp_dev->admin_vq.cmd_lock); + vp_dev->admin_vq.info.vq = NULL; + mutex_unlock(&vp_dev->admin_vq.cmd_lock); + } + if (vp_dev->msix_enabled) vp_modern_queue_vector(mdev, vq->index, VIRTIO_MSI_NO_VECTOR); @@ -527,6 +736,45 @@ static bool vp_get_shm_region(struct virtio_device *vdev, return true; } +static int vp_modern_create_avq(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq; + struct virtqueue *vq; + u16 admin_q_num; + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return 0; + + admin_q_num = vp_modern_avq_num(&vp_dev->mdev); + if (!admin_q_num) + return -EINVAL; + + avq = &vp_dev->admin_vq; + avq->vq_index = vp_modern_avq_index(&vp_dev->mdev); + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL, + avq->name, NULL, VIRTIO_MSI_NO_VECTOR); + if (IS_ERR(vq)) { + dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld", + PTR_ERR(vq)); + return PTR_ERR(vq); + } + + vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true); + return 0; +} + +static void vp_modern_destroy_avq(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return; + + vp_dev->del_vq(&vp_dev->admin_vq.info); +} + static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get = NULL, .set = NULL, @@ -545,6 +793,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, + .create_avq = vp_modern_create_avq, + .destroy_avq = vp_modern_destroy_avq, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -565,6 +815,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, + .create_avq = vp_modern_create_avq, + .destroy_avq = vp_modern_destroy_avq, }; /* the PCI probing function */ @@ -588,9 +840,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; + vp_dev->is_avq = vp_is_avq; vp_dev->isr = mdev->isr; vp_dev->vdev.id = mdev->id; + mutex_init(&vp_dev->admin_vq.cmd_lock); return 0; } @@ -598,5 +852,6 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) { struct virtio_pci_modern_device *mdev = &vp_dev->mdev; + mutex_destroy(&vp_dev->admin_vq.cmd_lock); vp_modern_remove(mdev); } diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 7de8b1ebabac4217b2240f6d8faaf486b4265f38..0d3dbfaf4b236910530b967a3fb9de25b68bcd36 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -207,6 +207,10 @@ static inline void check_offsets(void) offsetof(struct virtio_pci_modern_common_cfg, queue_notify_data)); BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_RESET != offsetof(struct virtio_pci_modern_common_cfg, queue_reset)); + BUILD_BUG_ON(VIRTIO_PCI_COMMON_ADM_Q_IDX != + offsetof(struct virtio_pci_modern_common_cfg, admin_queue_index)); + BUILD_BUG_ON(VIRTIO_PCI_COMMON_ADM_Q_NUM != + offsetof(struct virtio_pci_modern_common_cfg, admin_queue_num)); } /* @@ -296,7 +300,7 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) mdev->common = vp_modern_map_capability(mdev, common, sizeof(struct virtio_pci_common_cfg), 4, 0, offsetofend(struct virtio_pci_modern_common_cfg, - queue_reset), + admin_queue_num), &mdev->common_len, NULL); if (!mdev->common) goto err_map_common; @@ -719,6 +723,24 @@ void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, } EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify); +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev) +{ + struct virtio_pci_modern_common_cfg __iomem *cfg; + + cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common; + return vp_ioread16(&cfg->admin_queue_num); +} +EXPORT_SYMBOL_GPL(vp_modern_avq_num); + +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev) +{ + struct virtio_pci_modern_common_cfg __iomem *cfg; + + cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common; + return vp_ioread16(&cfg->admin_queue_index); +} +EXPORT_SYMBOL_GPL(vp_modern_avq_index); + MODULE_VERSION("0.1"); MODULE_DESCRIPTION("Modern Virtio PCI Device"); MODULE_AUTHOR("Jason Wang "); diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 731e3d14b67d360e3ac2f14b2c139d38d4b4caae..0e8418066a482f5ce6332372b3af1259ef02237a 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -42,6 +42,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_free_inode(struct inode *inode); struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev); +void v9fs_set_netfs_context(struct inode *inode); int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t rdev); void v9fs_evict_inode(struct inode *inode); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 8a635999a7d617ee4920854f9108d93855bb55ab..047855033d32f73f054a074452622499d5cf983c 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -19,12 +19,45 @@ #include #include #include +#include #include "v9fs.h" #include "v9fs_vfs.h" #include "cache.h" #include "fid.h" +static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq) +{ + struct p9_fid *fid = subreq->rreq->netfs_priv; + int err, len; + + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); + len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); + netfs_write_subrequest_terminated(subreq, len ?: err, false); +} + +static void v9fs_upload_to_server_worker(struct work_struct *work) +{ + struct netfs_io_subrequest *subreq = + container_of(work, struct netfs_io_subrequest, work); + + v9fs_upload_to_server(subreq); +} + +/* + * Set up write requests for a writeback slice. We need to add a write request + * for each write we want to make. + */ +static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) +{ + struct netfs_io_subrequest *subreq; + + subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, + start, len, v9fs_upload_to_server_worker); + if (subreq) + netfs_queue_write_request(subreq); +} + /** * v9fs_issue_read - Issue a read from 9P * @subreq: The read to make @@ -33,14 +66,10 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct p9_fid *fid = rreq->netfs_priv; - struct iov_iter to; - loff_t pos = subreq->start + subreq->transferred; - size_t len = subreq->len - subreq->transferred; int total, err; - iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len); - - total = p9_client_read(fid, pos, &to, &err); + total = p9_client_read(fid, subreq->start + subreq->transferred, + &subreq->io_iter, &err); /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ @@ -50,25 +79,42 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) } /** - * v9fs_init_request - Initialise a read request + * v9fs_init_request - Initialise a request * @rreq: The read request * @file: The file being read from */ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) { - struct p9_fid *fid = file->private_data; - - BUG_ON(!fid); + struct p9_fid *fid; + bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || + rreq->origin == NETFS_WRITEBACK || + rreq->origin == NETFS_WRITETHROUGH || + rreq->origin == NETFS_LAUNDER_WRITE || + rreq->origin == NETFS_UNBUFFERED_WRITE || + rreq->origin == NETFS_DIO_WRITE); + + if (file) { + fid = file->private_data; + if (!fid) + goto no_fid; + p9_fid_get(fid); + } else { + fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true); + if (!fid) + goto no_fid; + } /* we might need to read from a fid that was opened write-only * for read-modify-write of page cache, use the writeback fid * for that */ - WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && - !(fid->mode & P9_ORDWR)); - - p9_fid_get(fid); + WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && !(fid->mode & P9_ORDWR)); rreq->netfs_priv = fid; return 0; + +no_fid: + WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", + rreq->inode->i_ino); + return -EINVAL; } /** @@ -82,281 +128,20 @@ static void v9fs_free_request(struct netfs_io_request *rreq) p9_fid_put(fid); } -/** - * v9fs_begin_cache_operation - Begin a cache operation for a read - * @rreq: The read request - */ -static int v9fs_begin_cache_operation(struct netfs_io_request *rreq) -{ -#ifdef CONFIG_9P_FSCACHE - struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode)); - - return fscache_begin_read_operation(&rreq->cache_resources, cookie); -#else - return -ENOBUFS; -#endif -} - const struct netfs_request_ops v9fs_req_ops = { .init_request = v9fs_init_request, .free_request = v9fs_free_request, - .begin_cache_operation = v9fs_begin_cache_operation, .issue_read = v9fs_issue_read, + .create_write_requests = v9fs_create_write_requests, }; -/** - * v9fs_release_folio - release the private state associated with a folio - * @folio: The folio to be released - * @gfp: The caller's allocation restrictions - * - * Returns true if the page can be released, false otherwise. - */ - -static bool v9fs_release_folio(struct folio *folio, gfp_t gfp) -{ - if (folio_test_private(folio)) - return false; -#ifdef CONFIG_9P_FSCACHE - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - fscache_note_page_release(v9fs_inode_cookie(V9FS_I(folio_inode(folio)))); -#endif - return true; -} - -static void v9fs_invalidate_folio(struct folio *folio, size_t offset, - size_t length) -{ - folio_wait_fscache(folio); -} - -#ifdef CONFIG_9P_FSCACHE -static void v9fs_write_to_cache_done(void *priv, ssize_t transferred_or_error, - bool was_async) -{ - struct v9fs_inode *v9inode = priv; - __le32 version; - - if (IS_ERR_VALUE(transferred_or_error) && - transferred_or_error != -ENOBUFS) { - version = cpu_to_le32(v9inode->qid.version); - fscache_invalidate(v9fs_inode_cookie(v9inode), &version, - i_size_read(&v9inode->netfs.inode), 0); - } -} -#endif - -static int v9fs_vfs_write_folio_locked(struct folio *folio) -{ - struct inode *inode = folio_inode(folio); - loff_t start = folio_pos(folio); - loff_t i_size = i_size_read(inode); - struct iov_iter from; - size_t len = folio_size(folio); - struct p9_fid *writeback_fid; - int err; - struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); - struct fscache_cookie __maybe_unused *cookie = v9fs_inode_cookie(v9inode); - - if (start >= i_size) - return 0; /* Simultaneous truncation occurred */ - - len = min_t(loff_t, i_size - start, len); - - iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len); - - writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true); - if (!writeback_fid) { - WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n", - inode->i_private); - return -EINVAL; - } - - folio_wait_fscache(folio); - folio_start_writeback(folio); - - p9_client_write(writeback_fid, start, &from, &err); - -#ifdef CONFIG_9P_FSCACHE - if (err == 0 && - fscache_cookie_enabled(cookie) && - test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) { - folio_start_fscache(folio); - fscache_write_to_cache(v9fs_inode_cookie(v9inode), - folio_mapping(folio), start, len, i_size, - v9fs_write_to_cache_done, v9inode, - true); - } -#endif - - folio_end_writeback(folio); - p9_fid_put(writeback_fid); - - return err; -} - -static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); - int retval; - - p9_debug(P9_DEBUG_VFS, "folio %p\n", folio); - - retval = v9fs_vfs_write_folio_locked(folio); - if (retval < 0) { - if (retval == -EAGAIN) { - folio_redirty_for_writepage(wbc, folio); - retval = 0; - } else { - mapping_set_error(folio_mapping(folio), retval); - } - } else - retval = 0; - - folio_unlock(folio); - return retval; -} - -static int v9fs_launder_folio(struct folio *folio) -{ - int retval; - - if (folio_clear_dirty_for_io(folio)) { - retval = v9fs_vfs_write_folio_locked(folio); - if (retval) - return retval; - } - folio_wait_fscache(folio); - return 0; -} - -/** - * v9fs_direct_IO - 9P address space operation for direct I/O - * @iocb: target I/O control block - * @iter: The data/buffer to use - * - * The presence of v9fs_direct_IO() in the address space ops vector - * allowes open() O_DIRECT flags which would have failed otherwise. - * - * In the non-cached mode, we shunt off direct read and write requests before - * the VFS gets them, so this method should never be called. - * - * Direct IO is not 'yet' supported in the cached mode. Hence when - * this routine is called through generic_file_aio_read(), the read/write fails - * with an error. - * - */ -static ssize_t -v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - loff_t pos = iocb->ki_pos; - ssize_t n; - int err = 0; - - if (iov_iter_rw(iter) == WRITE) { - n = p9_client_write(file->private_data, pos, iter, &err); - if (n) { - struct inode *inode = file_inode(file); - loff_t i_size = i_size_read(inode); - - if (pos + n > i_size) - inode_add_bytes(inode, pos + n - i_size); - } - } else { - n = p9_client_read(file->private_data, pos, iter, &err); - } - return n ? n : err; -} - -static int v9fs_write_begin(struct file *filp, struct address_space *mapping, - loff_t pos, unsigned int len, - struct page **subpagep, void **fsdata) -{ - int retval; - struct folio *folio; - struct v9fs_inode *v9inode = V9FS_I(mapping->host); - - p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - - /* Prefetch area to be written into the cache if we're caching this - * file. We need to do this before we get a lock on the page in case - * there's more than one writer competing for the same cache block. - */ - retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata); - if (retval < 0) - return retval; - - *subpagep = &folio->page; - return retval; -} - -static int v9fs_write_end(struct file *filp, struct address_space *mapping, - loff_t pos, unsigned int len, unsigned int copied, - struct page *subpage, void *fsdata) -{ - loff_t last_pos = pos + copied; - struct folio *folio = page_folio(subpage); - struct inode *inode = mapping->host; - - p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - - if (!folio_test_uptodate(folio)) { - if (unlikely(copied < len)) { - copied = 0; - goto out; - } - - folio_mark_uptodate(folio); - } - - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold the i_mutex. - */ - if (last_pos > inode->i_size) { - inode_add_bytes(inode, last_pos - inode->i_size); - i_size_write(inode, last_pos); -#ifdef CONFIG_9P_FSCACHE - fscache_update_cookie(v9fs_inode_cookie(V9FS_I(inode)), NULL, - &last_pos); -#endif - } - folio_mark_dirty(folio); -out: - folio_unlock(folio); - folio_put(folio); - - return copied; -} - -#ifdef CONFIG_9P_FSCACHE -/* - * Mark a page as having been made dirty and thus needing writeback. We also - * need to pin the cache object to write back to. - */ -static bool v9fs_dirty_folio(struct address_space *mapping, struct folio *folio) -{ - struct v9fs_inode *v9inode = V9FS_I(mapping->host); - - return fscache_dirty_folio(mapping, folio, v9fs_inode_cookie(v9inode)); -} -#else -#define v9fs_dirty_folio filemap_dirty_folio -#endif - const struct address_space_operations v9fs_addr_operations = { - .read_folio = netfs_read_folio, - .readahead = netfs_readahead, - .dirty_folio = v9fs_dirty_folio, - .writepage = v9fs_vfs_writepage, - .write_begin = v9fs_write_begin, - .write_end = v9fs_write_end, - .release_folio = v9fs_release_folio, - .invalidate_folio = v9fs_invalidate_folio, - .launder_folio = v9fs_launder_folio, - .direct_IO = v9fs_direct_IO, + .read_folio = netfs_read_folio, + .readahead = netfs_readahead, + .dirty_folio = netfs_dirty_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, + .launder_folio = netfs_launder_folio, + .direct_IO = noop_direct_IO, + .writepages = netfs_writepages, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 11cd8d23f6f2384dfee0ee96e85ba894f373bbe3..bae330c2f0cf07d207af8c193dad15a78703793a 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -353,25 +353,15 @@ static ssize_t v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct p9_fid *fid = iocb->ki_filp->private_data; - int ret, err = 0; p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n", fid->fid, iov_iter_count(to), iocb->ki_pos); - if (!(fid->mode & P9L_DIRECT)) { - p9_debug(P9_DEBUG_VFS, "(cached)\n"); - return generic_file_read_iter(iocb, to); - } - - if (iocb->ki_filp->f_flags & O_NONBLOCK) - ret = p9_client_read_once(fid, iocb->ki_pos, to, &err); - else - ret = p9_client_read(fid, iocb->ki_pos, to, &err); - if (!ret) - return err; + if (fid->mode & P9L_DIRECT) + return netfs_unbuffered_read_iter(iocb, to); - iocb->ki_pos += ret; - return ret; + p9_debug(P9_DEBUG_VFS, "(cached)\n"); + return netfs_file_read_iter(iocb, to); } /* @@ -407,46 +397,14 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct p9_fid *fid = file->private_data; - ssize_t retval; - loff_t origin; - int err = 0; p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid); - if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) { - p9_debug(P9_DEBUG_CACHE, "(cached)\n"); - return generic_file_write_iter(iocb, from); - } + if (fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE)) + return netfs_unbuffered_write_iter(iocb, from); - retval = generic_write_checks(iocb, from); - if (retval <= 0) - return retval; - - origin = iocb->ki_pos; - retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err); - if (retval > 0) { - struct inode *inode = file_inode(file); - loff_t i_size; - unsigned long pg_start, pg_end; - - pg_start = origin >> PAGE_SHIFT; - pg_end = (origin + retval - 1) >> PAGE_SHIFT; - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2_range(inode->i_mapping, - pg_start, pg_end); - iocb->ki_pos += retval; - i_size = i_size_read(inode); - if (iocb->ki_pos > i_size) { - inode_add_bytes(inode, iocb->ki_pos - i_size); - /* - * Need to serialize against i_size_write() in - * v9fs_stat2inode() - */ - v9fs_i_size_write(inode, iocb->ki_pos); - } - return retval; - } - return err; + p9_debug(P9_DEBUG_CACHE, "(cached)\n"); + return netfs_file_write_iter(iocb, from); } static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, @@ -519,36 +477,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) static vm_fault_t v9fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct folio *folio = page_folio(vmf->page); - struct file *filp = vmf->vma->vm_file; - struct inode *inode = file_inode(filp); - - - p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n", - folio, (unsigned long)filp->private_data); - - /* Wait for the page to be written to the cache before we allow it to - * be modified. We then assume the entire page will need writing back. - */ -#ifdef CONFIG_9P_FSCACHE - if (folio_test_fscache(folio) && - folio_wait_fscache_killable(folio) < 0) - return VM_FAULT_NOPAGE; -#endif - - /* Update file times before taking page lock */ - file_update_time(filp); - - if (folio_lock_killable(folio) < 0) - return VM_FAULT_RETRY; - if (folio_mapping(folio) != inode->i_mapping) - goto out_unlock; - folio_wait_stable(folio); - - return VM_FAULT_LOCKED; -out_unlock: - folio_unlock(folio); - return VM_FAULT_NOPAGE; + return netfs_page_mkwrite(vmf, NULL); } static void v9fs_mmap_vm_close(struct vm_area_struct *vma) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b845ee18a80be7a1aac30fab226f45a7ae02f343..32572982f72e68a6db3967d9ab9ba9d51c8bae9c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -246,10 +246,10 @@ void v9fs_free_inode(struct inode *inode) /* * Set parameters for the netfs library */ -static void v9fs_set_netfs_context(struct inode *inode) +void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); - netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); + netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true); } int v9fs_init_inode(struct v9fs_session_info *v9ses, @@ -326,8 +326,6 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, err = -EINVAL; goto error; } - - v9fs_set_netfs_context(inode); error: return err; @@ -359,6 +357,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev) iput(inode); return ERR_PTR(err); } + v9fs_set_netfs_context(inode); return inode; } @@ -374,11 +373,8 @@ void v9fs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); -#ifdef CONFIG_9P_FSCACHE version = cpu_to_le32(v9inode->qid.version); - fscache_clear_inode_writeback(v9fs_inode_cookie(v9inode), inode, - &version); -#endif + netfs_clear_inode_writeback(inode, &version); clear_inode(inode); filemap_fdatawrite(&inode->i_data); @@ -464,6 +460,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; @@ -1113,7 +1110,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap, if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); - truncate_pagecache(inode, iattr->ia_size); + netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { @@ -1181,6 +1178,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; + v9inode->netfs.remote_i_size = stat->length; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index c7319af2f4711e5686f453262b6ce3eeee9e651f..3505227e170402be03b2df40ff8f3ba2994a07d2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -128,6 +128,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) @@ -598,7 +599,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); - truncate_pagecache(inode, iattr->ia_size); + netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) @@ -655,6 +656,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; + v9inode->netfs.remote_i_size = stat->st_size; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; @@ -683,8 +685,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, inode->i_mode = mode; } if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && - stat->st_result_mask & P9_STATS_SIZE) + stat->st_result_mask & P9_STATS_SIZE) { + v9inode->netfs.remote_i_size = stat->st_size; v9fs_i_size_write(inode, stat->st_size); + } if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 73db55c050bf10b60137182d0cb639cb72561779..941f7d0e0bfa27e67aa34a9c7eebec3a65fc6f99 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -289,31 +289,21 @@ static int v9fs_drop_inode(struct inode *inode) static int v9fs_write_inode(struct inode *inode, struct writeback_control *wbc) { - struct v9fs_inode *v9inode; - /* * send an fsync request to server irrespective of * wbc->sync_mode. */ p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); - - v9inode = V9FS_I(inode); - fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode)); - - return 0; + return netfs_unpin_writeback(inode, wbc); } static int v9fs_write_inode_dotl(struct inode *inode, struct writeback_control *wbc) { - struct v9fs_inode *v9inode; - v9inode = V9FS_I(inode); p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); - fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode)); - - return 0; + return netfs_unpin_writeback(inode, wbc); } static const struct super_operations v9fs_super_ops = { diff --git a/fs/Kconfig b/fs/Kconfig index 34967d59280dac781d85d68d83448319139aac2d..096833f498a288f992fedc171ac2170727eaf31d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -145,7 +145,6 @@ source "fs/incfs/Kconfig" menu "Caches" source "fs/netfs/Kconfig" -source "fs/fscache/Kconfig" source "fs/cachefiles/Kconfig" endmenu diff --git a/fs/Makefile b/fs/Makefile index 69f09c8766490ad4aa1f874df658f053a22f15c9..058d1d2b12179ca4e567e612570053f1b2b0fafa 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_NETFS_SUPPORT) += netfs/ -obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 2cd40ba601f1cd45a8f80106b8c550cb5e3d94a7..d3bc4a2d708519624673be4fd0572e9080da732c 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -76,7 +76,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) /* there shouldn't be an existing inode */ BUG_ON(!(inode->i_state & I_NEW)); - netfs_inode_init(&vnode->netfs, NULL); + netfs_inode_init(&vnode->netfs, NULL, false); inode->i_size = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; if (root) { diff --git a/fs/afs/file.c b/fs/afs/file.c index 30914e0d9cb29903cd42aacc9da6c3088f52f78a..3d33b221d9ca256a3b3d978a835d2db9fff2e284 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -20,9 +20,6 @@ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_symlink_read_folio(struct file *file, struct folio *folio); -static void afs_invalidate_folio(struct folio *folio, size_t offset, - size_t length); -static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -37,7 +34,7 @@ const struct file_operations afs_file_operations = { .release = afs_release, .llseek = generic_file_llseek, .read_iter = afs_file_read_iter, - .write_iter = afs_file_write, + .write_iter = netfs_file_write_iter, .mmap = afs_file_mmap, .splice_read = afs_file_splice_read, .splice_write = iter_file_splice_write, @@ -53,22 +50,21 @@ const struct inode_operations afs_file_inode_operations = { }; const struct address_space_operations afs_file_aops = { + .direct_IO = noop_direct_IO, .read_folio = netfs_read_folio, .readahead = netfs_readahead, - .dirty_folio = afs_dirty_folio, - .launder_folio = afs_launder_folio, - .release_folio = afs_release_folio, - .invalidate_folio = afs_invalidate_folio, - .write_begin = afs_write_begin, - .write_end = afs_write_end, - .writepages = afs_writepages, + .dirty_folio = netfs_dirty_folio, + .launder_folio = netfs_launder_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, .migrate_folio = filemap_migrate_folio, + .writepages = afs_writepages, }; const struct address_space_operations afs_symlink_aops = { .read_folio = afs_symlink_read_folio, - .release_folio = afs_release_folio, - .invalidate_folio = afs_invalidate_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, .migrate_folio = filemap_migrate_folio, }; @@ -323,11 +319,7 @@ static void afs_issue_read(struct netfs_io_subrequest *subreq) fsreq->len = subreq->len - subreq->transferred; fsreq->key = key_get(subreq->rreq->netfs_priv); fsreq->vnode = vnode; - fsreq->iter = &fsreq->def_iter; - - iov_iter_xarray(&fsreq->def_iter, ITER_DEST, - &fsreq->vnode->netfs.inode.i_mapping->i_pages, - fsreq->pos, fsreq->len); + fsreq->iter = &subreq->io_iter; afs_fetch_data(fsreq->vnode, fsreq); afs_put_read(fsreq); @@ -359,22 +351,13 @@ static int afs_symlink_read_folio(struct file *file, struct folio *folio) static int afs_init_request(struct netfs_io_request *rreq, struct file *file) { - rreq->netfs_priv = key_get(afs_file_key(file)); + if (file) + rreq->netfs_priv = key_get(afs_file_key(file)); + rreq->rsize = 256 * 1024; + rreq->wsize = 256 * 1024; return 0; } -static int afs_begin_cache_operation(struct netfs_io_request *rreq) -{ -#ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = AFS_FS_I(rreq->inode); - - return fscache_begin_read_operation(&rreq->cache_resources, - afs_vnode_cache(vnode)); -#else - return -ENOBUFS; -#endif -} - static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, struct folio **foliop, void **_fsdata) { @@ -388,128 +371,37 @@ static void afs_free_request(struct netfs_io_request *rreq) key_put(rreq->netfs_priv); } -const struct netfs_request_ops afs_req_ops = { - .init_request = afs_init_request, - .free_request = afs_free_request, - .begin_cache_operation = afs_begin_cache_operation, - .check_write_begin = afs_check_write_begin, - .issue_read = afs_issue_read, -}; - -int afs_write_inode(struct inode *inode, struct writeback_control *wbc) +static void afs_update_i_size(struct inode *inode, loff_t new_i_size) { - fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode))); - return 0; -} - -/* - * Adjust the dirty region of the page on truncation or full invalidation, - * getting rid of the markers altogether if the region is entirely invalidated. - */ -static void afs_invalidate_dirty(struct folio *folio, size_t offset, - size_t length) -{ - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); - unsigned long priv; - unsigned int f, t, end = offset + length; - - priv = (unsigned long)folio_get_private(folio); - - /* we clean up only if the entire page is being invalidated */ - if (offset == 0 && length == folio_size(folio)) - goto full_invalidate; - - /* If the page was dirtied by page_mkwrite(), the PTE stays writable - * and we don't get another notification to tell us to expand it - * again. - */ - if (afs_is_folio_dirty_mmapped(priv)) - return; - - /* We may need to shorten the dirty region */ - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - - if (t <= offset || f >= end) - return; /* Doesn't overlap */ - - if (f < offset && t > end) - return; /* Splits the dirty region - just absorb it */ - - if (f >= offset && t <= end) - goto undirty; + struct afs_vnode *vnode = AFS_FS_I(inode); + loff_t i_size; - if (f < offset) - t = offset; - else - f = end; - if (f == t) - goto undirty; - - priv = afs_folio_dirty(folio, f, t); - folio_change_private(folio, (void *)priv); - trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio); - return; - -undirty: - trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio); - folio_clear_dirty_for_io(folio); -full_invalidate: - trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio); - folio_detach_private(folio); + write_seqlock(&vnode->cb_lock); + i_size = i_size_read(&vnode->netfs.inode); + if (new_i_size > i_size) { + i_size_write(&vnode->netfs.inode, new_i_size); + inode_set_bytes(&vnode->netfs.inode, new_i_size); + } + write_sequnlock(&vnode->cb_lock); + fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size); } -/* - * invalidate part or all of a page - * - release a page and clean up its private data if offset is 0 (indicating - * the entire page) - */ -static void afs_invalidate_folio(struct folio *folio, size_t offset, - size_t length) +static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq) { - _enter("{%lu},%zu,%zu", folio->index, offset, length); - - BUG_ON(!folio_test_locked(folio)); + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); - if (folio_get_private(folio)) - afs_invalidate_dirty(folio, offset, length); - - folio_wait_fscache(folio); - _leave(""); + afs_invalidate_cache(vnode, 0); } -/* - * release a page and clean up its private state if it's not busy - * - return true if the page can now be released, false if not - */ -static bool afs_release_folio(struct folio *folio, gfp_t gfp) -{ - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); - - _enter("{{%llx:%llu}[%lu],%lx},%x", - vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags, - gfp); - - /* deny if folio is being written to the cache and the caller hasn't - * elected to wait */ -#ifdef CONFIG_AFS_FSCACHE - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - fscache_note_page_release(afs_vnode_cache(vnode)); -#endif - - if (folio_test_private(folio)) { - trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio); - folio_detach_private(folio); - } - - /* Indicate that the folio can be released */ - _leave(" = T"); - return true; -} +const struct netfs_request_ops afs_req_ops = { + .init_request = afs_init_request, + .free_request = afs_free_request, + .check_write_begin = afs_check_write_begin, + .issue_read = afs_issue_read, + .update_i_size = afs_update_i_size, + .invalidate_cache = afs_netfs_invalidate_cache, + .create_write_requests = afs_create_write_requests, +}; static void afs_add_open_mmap(struct afs_vnode *vnode) { @@ -576,28 +468,39 @@ static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pg static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + struct inode *inode = file_inode(iocb->ki_filp); + struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = iocb->ki_filp->private_data; - int ret; + ssize_t ret; - ret = afs_validate(vnode, af->key); + if (iocb->ki_flags & IOCB_DIRECT) + return netfs_unbuffered_read_iter(iocb, iter); + + ret = netfs_start_io_read(inode); if (ret < 0) return ret; - - return generic_file_read_iter(iocb, iter); + ret = afs_validate(vnode, af->key); + if (ret == 0) + ret = filemap_read(iocb, iter, 0); + netfs_end_io_read(inode); + return ret; } static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - struct afs_vnode *vnode = AFS_FS_I(file_inode(in)); + struct inode *inode = file_inode(in); + struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = in->private_data; - int ret; + ssize_t ret; - ret = afs_validate(vnode, af->key); + ret = netfs_start_io_read(inode); if (ret < 0) return ret; - - return filemap_splice_read(in, ppos, pipe, len, flags); + ret = afs_validate(vnode, af->key); + if (ret == 0) + ret = filemap_splice_read(in, ppos, pipe, len, flags); + netfs_end_io_read(inode); + return ret; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4f04f6f33f46b940ffee52b25f527b17d729f1b9..94fc049aff584f43e622d164a13fc30962dd04f1 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -58,7 +58,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren */ static void afs_set_netfs_context(struct afs_vnode *vnode) { - netfs_inode_init(&vnode->netfs, &afs_req_ops); + netfs_inode_init(&vnode->netfs, &afs_req_ops, true); } /* @@ -166,6 +166,7 @@ static void afs_apply_status(struct afs_operation *op, struct inode *inode = &vnode->netfs.inode; struct timespec64 t; umode_t mode; + bool unexpected_jump = false; bool data_changed = false; bool change_size = vp->set_size; @@ -230,6 +231,7 @@ static void afs_apply_status(struct afs_operation *op, } change_size = true; data_changed = true; + unexpected_jump = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a @@ -249,8 +251,10 @@ static void afs_apply_status(struct afs_operation *op, * what's on the server. */ vnode->netfs.remote_i_size = status->size; - if (change_size) { + if (change_size || status->size > i_size_read(inode)) { afs_set_i_size(vnode, status->size); + if (unexpected_jump) + vnode->netfs.zero_point = status->size; inode_set_ctime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); } @@ -647,7 +651,7 @@ void afs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); afs_set_cache_aux(vnode, &aux); - fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux); + netfs_clear_inode_writeback(inode, &aux); clear_inode(inode); while (!list_empty(&vnode->wb_keys)) { @@ -689,17 +693,17 @@ static void afs_setattr_success(struct afs_operation *op) static void afs_setattr_edit_file(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; - struct inode *inode = &vp->vnode->netfs.inode; + struct afs_vnode *vnode = vp->vnode; if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; loff_t i_size = op->setattr.old_i_size; - if (size < i_size) - truncate_pagecache(inode, size); - if (size != i_size) - fscache_resize_cookie(afs_vnode_cache(vp->vnode), - vp->scb.status.size); + if (size != i_size) { + truncate_setsize(&vnode->netfs.inode, size); + netfs_resize_file(&vnode->netfs, size, true); + fscache_resize_cookie(afs_vnode_cache(vnode), size); + } } } @@ -767,11 +771,11 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && attr->ia_size < i_size && - attr->ia_size > vnode->status.size) { - truncate_pagecache(inode, attr->ia_size); + attr->ia_size > vnode->netfs.remote_i_size) { + truncate_setsize(inode, attr->ia_size); + netfs_resize_file(&vnode->netfs, size, false); fscache_resize_cookie(afs_vnode_cache(vnode), attr->ia_size); - i_size_write(inode, attr->ia_size); ret = 0; goto out_unlock; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 55aa0679d8cec4b349424239852372e39b656395..9c03fcf7ffaa84e9f7604444209bd934b64db466 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -985,62 +985,6 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl i_size_read(&vnode->netfs.inode), flags); } -/* - * We use folio->private to hold the amount of the folio that we've written to, - * splitting the field into two parts. However, we need to represent a range - * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio - * exceeds what we can encode. - */ -#ifdef CONFIG_64BIT -#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL -#define __AFS_FOLIO_PRIV_SHIFT 32 -#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL -#else -#define __AFS_FOLIO_PRIV_MASK 0x7fffUL -#define __AFS_FOLIO_PRIV_SHIFT 16 -#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL -#endif - -static inline unsigned int afs_folio_dirty_resolution(struct folio *folio) -{ - int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1); - return (shift > 0) ? shift : 0; -} - -static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv) -{ - unsigned long x = priv & __AFS_FOLIO_PRIV_MASK; - - /* The lower bound is inclusive */ - return x << afs_folio_dirty_resolution(folio); -} - -static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv) -{ - unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK; - - /* The upper bound is immediately beyond the region */ - return (x + 1) << afs_folio_dirty_resolution(folio); -} - -static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to) -{ - unsigned int res = afs_folio_dirty_resolution(folio); - from >>= res; - to = (to - 1) >> res; - return (to << __AFS_FOLIO_PRIV_SHIFT) | from; -} - -static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv) -{ - return priv | __AFS_FOLIO_PRIV_MMAPPED; -} - -static inline bool afs_is_folio_dirty_mmapped(unsigned long priv) -{ - return priv & __AFS_FOLIO_PRIV_MMAPPED; -} - #include /*****************************************************************************/ @@ -1167,7 +1111,6 @@ extern int afs_release(struct inode *, struct file *); extern int afs_fetch_data(struct afs_vnode *, struct afs_read *); extern struct afs_read *afs_alloc_read(gfp_t); extern void afs_put_read(struct afs_read *); -extern int afs_write_inode(struct inode *, struct writeback_control *); static inline struct afs_read *afs_get_read(struct afs_read *req) { @@ -1658,24 +1601,11 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); /* * write.c */ -#ifdef CONFIG_AFS_FSCACHE -bool afs_dirty_folio(struct address_space *, struct folio *); -#else -#define afs_dirty_folio filemap_dirty_folio -#endif -extern int afs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct page **pagep, void **fsdata); -extern int afs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); -extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); -extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); extern void afs_prune_wb_keys(struct afs_vnode *); -int afs_launder_folio(struct folio *); +void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len); /* * xattr.c diff --git a/fs/afs/super.c b/fs/afs/super.c index ae2d66a52add9818101351d63a5cbad334e96e44..f3ba1c3e72f5b8d58e3f1c13eb2935d202cfec68 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -55,7 +55,7 @@ int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, - .write_inode = afs_write_inode, + .write_inode = netfs_unpin_writeback, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .free_inode = afs_free_inode, diff --git a/fs/afs/write.c b/fs/afs/write.c index 61d34ad2ca7dcd48229f7ff1ae40519511c88aac..74402d95a88434bb58e1e3989d296c11d8f9861d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -12,309 +12,17 @@ #include #include #include +#include #include "internal.h" -static int afs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next, - bool max_one_loop); - -static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len, - loff_t i_size, bool caching); - -#ifdef CONFIG_AFS_FSCACHE -/* - * Mark a page as having been made dirty and thus needing writeback. We also - * need to pin the cache object to write back to. - */ -bool afs_dirty_folio(struct address_space *mapping, struct folio *folio) -{ - return fscache_dirty_folio(mapping, folio, - afs_vnode_cache(AFS_FS_I(mapping->host))); -} -static void afs_folio_start_fscache(bool caching, struct folio *folio) -{ - if (caching) - folio_start_fscache(folio); -} -#else -static void afs_folio_start_fscache(bool caching, struct folio *folio) -{ -} -#endif - -/* - * Flush out a conflicting write. This may extend the write to the surrounding - * pages if also dirty and contiguous to the conflicting region.. - */ -static int afs_flush_conflicting_write(struct address_space *mapping, - struct folio *folio) -{ - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = folio_pos(folio), - .range_end = LLONG_MAX, - }; - loff_t next; - - return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX, - &next, true); -} - -/* - * prepare to perform part of a write to a page - */ -int afs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct page **_page, void **fsdata) -{ - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - struct folio *folio; - unsigned long priv; - unsigned f, from; - unsigned t, to; - pgoff_t index; - int ret; - - _enter("{%llx:%llu},%llx,%x", - vnode->fid.vid, vnode->fid.vnode, pos, len); - - /* Prefetch area to be written into the cache if we're caching this - * file. We need to do this before we get a lock on the page in case - * there's more than one writer competing for the same cache block. - */ - ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata); - if (ret < 0) - return ret; - - index = folio_index(folio); - from = pos - index * PAGE_SIZE; - to = from + len; - -try_again: - /* See if this page is already partially written in a way that we can - * merge the new write with. - */ - if (folio_test_private(folio)) { - priv = (unsigned long)folio_get_private(folio); - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - ASSERTCMP(f, <=, t); - - if (folio_test_writeback(folio)) { - trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio); - folio_unlock(folio); - goto wait_for_writeback; - } - /* If the file is being filled locally, allow inter-write - * spaces to be merged into writes. If it's not, only write - * back what the user gives us. - */ - if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && - (to < f || from > t)) - goto flush_conflicting_write; - } - - *_page = folio_file_page(folio, pos / PAGE_SIZE); - _leave(" = 0"); - return 0; - - /* The previous write and this write aren't adjacent or overlapping, so - * flush the page out. - */ -flush_conflicting_write: - trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio); - folio_unlock(folio); - - ret = afs_flush_conflicting_write(mapping, folio); - if (ret < 0) - goto error; - -wait_for_writeback: - ret = folio_wait_writeback_killable(folio); - if (ret < 0) - goto error; - - ret = folio_lock_killable(folio); - if (ret < 0) - goto error; - goto try_again; - -error: - folio_put(folio); - _leave(" = %d", ret); - return ret; -} - -/* - * finalise part of a write to a page - */ -int afs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *subpage, void *fsdata) -{ - struct folio *folio = page_folio(subpage); - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - unsigned long priv; - unsigned int f, from = offset_in_folio(folio, pos); - unsigned int t, to = from + copied; - loff_t i_size, write_end_pos; - - _enter("{%llx:%llu},{%lx}", - vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); - - if (!folio_test_uptodate(folio)) { - if (copied < len) { - copied = 0; - goto out; - } - - folio_mark_uptodate(folio); - } - - if (copied == 0) - goto out; - - write_end_pos = pos + copied; - - i_size = i_size_read(&vnode->netfs.inode); - if (write_end_pos > i_size) { - write_seqlock(&vnode->cb_lock); - i_size = i_size_read(&vnode->netfs.inode); - if (write_end_pos > i_size) - afs_set_i_size(vnode, write_end_pos); - write_sequnlock(&vnode->cb_lock); - fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos); - } - - if (folio_test_private(folio)) { - priv = (unsigned long)folio_get_private(folio); - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - if (from < f) - f = from; - if (to > t) - t = to; - priv = afs_folio_dirty(folio, f, t); - folio_change_private(folio, (void *)priv); - trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio); - } else { - priv = afs_folio_dirty(folio, from, to); - folio_attach_private(folio, (void *)priv); - trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio); - } - - if (folio_mark_dirty(folio)) - _debug("dirtied %lx", folio_index(folio)); - -out: - folio_unlock(folio); - folio_put(folio); - return copied; -} - -/* - * kill all the pages in the given range - */ -static void afs_kill_pages(struct address_space *mapping, - loff_t start, loff_t len) -{ - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct folio *folio; - pgoff_t index = start / PAGE_SIZE; - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; - - _enter("{%llx:%llu},%llx @%llx", - vnode->fid.vid, vnode->fid.vnode, len, start); - - do { - _debug("kill %lx (to %lx)", index, last); - - folio = filemap_get_folio(mapping, index); - if (IS_ERR(folio)) { - next = index + 1; - continue; - } - - next = folio_next_index(folio); - - folio_clear_uptodate(folio); - folio_end_writeback(folio); - folio_lock(folio); - generic_error_remove_folio(mapping, folio); - folio_unlock(folio); - folio_put(folio); - - } while (index = next, index <= last); - - _leave(""); -} - -/* - * Redirty all the pages in a given range. - */ -static void afs_redirty_pages(struct writeback_control *wbc, - struct address_space *mapping, - loff_t start, loff_t len) -{ - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct folio *folio; - pgoff_t index = start / PAGE_SIZE; - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; - - _enter("{%llx:%llu},%llx @%llx", - vnode->fid.vid, vnode->fid.vnode, len, start); - - do { - _debug("redirty %llx @%llx", len, start); - - folio = filemap_get_folio(mapping, index); - if (IS_ERR(folio)) { - next = index + 1; - continue; - } - - next = index + folio_nr_pages(folio); - folio_redirty_for_writepage(wbc, folio); - folio_end_writeback(folio); - folio_put(folio); - } while (index = next, index <= last); - - _leave(""); -} - /* * completion of write to server */ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len) { - struct address_space *mapping = vnode->netfs.inode.i_mapping; - struct folio *folio; - pgoff_t end; - - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); - _enter("{%llx:%llu},{%x @%llx}", vnode->fid.vid, vnode->fid.vnode, len, start); - rcu_read_lock(); - - end = (start + len - 1) / PAGE_SIZE; - xas_for_each(&xas, folio, end) { - if (!folio_test_writeback(folio)) { - kdebug("bad %x @%llx page %lx %lx", - len, start, folio_index(folio), end); - ASSERT(folio_test_writeback(folio)); - } - - trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio); - folio_detach_private(folio); - folio_end_writeback(folio); - } - - rcu_read_unlock(); - afs_prune_wb_keys(vnode); _leave(""); } @@ -451,363 +159,53 @@ try_next_key: return afs_put_operation(op); } -/* - * Extend the region to be written back to include subsequent contiguously - * dirty pages if possible, but don't sleep while doing so. - * - * If this page holds new content, then we can include filler zeros in the - * writeback. - */ -static void afs_extend_writeback(struct address_space *mapping, - struct afs_vnode *vnode, - long *_count, - loff_t start, - loff_t max_len, - bool new_content, - bool caching, - unsigned int *_len) +static void afs_upload_to_server(struct netfs_io_subrequest *subreq) { - struct folio_batch fbatch; - struct folio *folio; - unsigned long priv; - unsigned int psize, filler = 0; - unsigned int f, t; - loff_t len = *_len; - pgoff_t index = (start + len) / PAGE_SIZE; - bool stop = true; - unsigned int i; - - XA_STATE(xas, &mapping->i_pages, index); - folio_batch_init(&fbatch); - - do { - /* Firstly, we gather up a batch of contiguous dirty pages - * under the RCU read lock - but we can't clear the dirty flags - * there if any of those pages are mapped. - */ - rcu_read_lock(); - - xas_for_each(&xas, folio, ULONG_MAX) { - stop = true; - if (xas_retry(&xas, folio)) - continue; - if (xa_is_value(folio)) - break; - if (folio_index(folio) != index) - break; - - if (!folio_try_get_rcu(folio)) { - xas_reset(&xas); - continue; - } - - /* Has the page moved or been split? */ - if (unlikely(folio != xas_reload(&xas))) { - folio_put(folio); - break; - } - - if (!folio_trylock(folio)) { - folio_put(folio); - break; - } - if (!folio_test_dirty(folio) || - folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - folio_put(folio); - break; - } - - psize = folio_size(folio); - priv = (unsigned long)folio_get_private(folio); - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - if (f != 0 && !new_content) { - folio_unlock(folio); - folio_put(folio); - break; - } - - len += filler + t; - filler = psize - t; - if (len >= max_len || *_count <= 0) - stop = true; - else if (t == psize || new_content) - stop = false; - - index += folio_nr_pages(folio); - if (!folio_batch_add(&fbatch, folio)) - break; - if (stop) - break; - } - - if (!stop) - xas_pause(&xas); - rcu_read_unlock(); - - /* Now, if we obtained any folios, we can shift them to being - * writable and mark them for caching. - */ - if (!folio_batch_count(&fbatch)) - break; - - for (i = 0; i < folio_batch_count(&fbatch); i++) { - folio = fbatch.folios[i]; - trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio); - - if (!folio_clear_dirty_for_io(folio)) - BUG(); - folio_start_writeback(folio); - afs_folio_start_fscache(caching, folio); - - *_count -= folio_nr_pages(folio); - folio_unlock(folio); - } + struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); + ssize_t ret; - folio_batch_release(&fbatch); - cond_resched(); - } while (!stop); + _enter("%x[%x],%zx", + subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count); - *_len = len; + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); + ret = afs_store_data(vnode, &subreq->io_iter, subreq->start, + subreq->rreq->origin == NETFS_LAUNDER_WRITE); + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, + false); } -/* - * Synchronously write back the locked page and any subsequent non-locked dirty - * pages. - */ -static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, - struct writeback_control *wbc, - struct folio *folio, - loff_t start, loff_t end) +static void afs_upload_to_server_worker(struct work_struct *work) { - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct iov_iter iter; - unsigned long priv; - unsigned int offset, to, len, max_len; - loff_t i_size = i_size_read(&vnode->netfs.inode); - bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode)); - long count = wbc->nr_to_write; - int ret; - - _enter(",%lx,%llx-%llx", folio_index(folio), start, end); - - folio_start_writeback(folio); - afs_folio_start_fscache(caching, folio); - - count -= folio_nr_pages(folio); - - /* Find all consecutive lockable dirty pages that have contiguous - * written regions, stopping when we find a page that is not - * immediately lockable, is not dirty or is missing, or we reach the - * end of the range. - */ - priv = (unsigned long)folio_get_private(folio); - offset = afs_folio_dirty_from(folio, priv); - to = afs_folio_dirty_to(folio, priv); - trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio); - - len = to - offset; - start += offset; - if (start < i_size) { - /* Trim the write to the EOF; the extra data is ignored. Also - * put an upper limit on the size of a single storedata op. - */ - max_len = 65536 * 4096; - max_len = min_t(unsigned long long, max_len, end - start + 1); - max_len = min_t(unsigned long long, max_len, i_size - start); - - if (len < max_len && - (to == folio_size(folio) || new_content)) - afs_extend_writeback(mapping, vnode, &count, - start, max_len, new_content, - caching, &len); - len = min_t(loff_t, len, max_len); - } - - /* We now have a contiguous set of dirty pages, each with writeback - * set; the first page is still locked at this point, but all the rest - * have been unlocked. - */ - folio_unlock(folio); - - if (start < i_size) { - _debug("write back %x @%llx [%llx]", len, start, i_size); - - /* Speculatively write to the cache. We have to fix this up - * later if the store fails. - */ - afs_write_to_cache(vnode, start, len, i_size, caching); - - iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len); - ret = afs_store_data(vnode, &iter, start, false); - } else { - _debug("write discard %x @%llx [%llx]", len, start, i_size); - - /* The dirty region was entirely beyond the EOF. */ - fscache_clear_page_bits(mapping, start, len, caching); - afs_pages_written_back(vnode, start, len); - ret = 0; - } - - switch (ret) { - case 0: - wbc->nr_to_write = count; - ret = len; - break; + struct netfs_io_subrequest *subreq = + container_of(work, struct netfs_io_subrequest, work); - default: - pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); - fallthrough; - case -EACCES: - case -EPERM: - case -ENOKEY: - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EKEYREVOKED: - case -ENETRESET: - afs_redirty_pages(wbc, mapping, start, len); - mapping_set_error(mapping, ret); - break; - - case -EDQUOT: - case -ENOSPC: - afs_redirty_pages(wbc, mapping, start, len); - mapping_set_error(mapping, -ENOSPC); - break; - - case -EROFS: - case -EIO: - case -EREMOTEIO: - case -EFBIG: - case -ENOENT: - case -ENOMEDIUM: - case -ENXIO: - trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); - afs_kill_pages(mapping, start, len); - mapping_set_error(mapping, ret); - break; - } - - _leave(" = %d", ret); - return ret; + afs_upload_to_server(subreq); } /* - * write a region of pages back to the server + * Set up write requests for a writeback slice. We need to add a write request + * for each write we want to make. */ -static int afs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next, - bool max_one_loop) +void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) { - struct folio *folio; - struct folio_batch fbatch; - ssize_t ret; - unsigned int i; - int n, skips = 0; - - _enter("%llx,%llx,", start, end); - folio_batch_init(&fbatch); - - do { - pgoff_t index = start / PAGE_SIZE; - - n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, - PAGECACHE_TAG_DIRTY, &fbatch); - - if (!n) - break; - for (i = 0; i < n; i++) { - folio = fbatch.folios[i]; - start = folio_pos(folio); /* May regress with THPs */ - - _debug("wback %lx", folio_index(folio)); - - /* At this point we hold neither the i_pages lock nor the - * page lock: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled - * back from swapper_space to tmpfs file mapping - */ -try_again: - if (wbc->sync_mode != WB_SYNC_NONE) { - ret = folio_lock_killable(folio); - if (ret < 0) { - folio_batch_release(&fbatch); - return ret; - } - } else { - if (!folio_trylock(folio)) - continue; - } - - if (folio->mapping != mapping || - !folio_test_dirty(folio)) { - start += folio_size(folio); - folio_unlock(folio); - continue; - } - - if (folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - if (wbc->sync_mode != WB_SYNC_NONE) { - folio_wait_writeback(folio); -#ifdef CONFIG_AFS_FSCACHE - folio_wait_fscache(folio); -#endif - goto try_again; - } - - start += folio_size(folio); - if (wbc->sync_mode == WB_SYNC_NONE) { - if (skips >= 5 || need_resched()) { - *_next = start; - folio_batch_release(&fbatch); - _leave(" = 0 [%llx]", *_next); - return 0; - } - skips++; - } - continue; - } - - if (!folio_clear_dirty_for_io(folio)) - BUG(); - ret = afs_write_back_from_locked_folio(mapping, wbc, - folio, start, end); - if (ret < 0) { - _leave(" = %zd", ret); - folio_batch_release(&fbatch); - return ret; - } - - start += ret; - } + struct netfs_io_subrequest *subreq; - folio_batch_release(&fbatch); - cond_resched(); - } while (wbc->nr_to_write > 0); + _enter("%x,%llx-%llx", wreq->debug_id, start, start + len); - *_next = start; - _leave(" = 0 [%llx]", *_next); - return 0; + subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, + start, len, afs_upload_to_server_worker); + if (subreq) + netfs_queue_write_request(subreq); } /* * write some of the pending data back to the server */ -int afs_writepages(struct address_space *mapping, - struct writeback_control *wbc) +int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); - loff_t start, next; int ret; - _enter(""); - /* We have to be careful as we can end up racing with setattr() * truncating the pagecache since the caller doesn't take a lock here * to prevent it. @@ -817,68 +215,11 @@ int afs_writepages(struct address_space *mapping, else if (!down_read_trylock(&vnode->validate_lock)) return 0; - if (wbc->range_cyclic) { - start = mapping->writeback_index * PAGE_SIZE; - ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, - &next, false); - if (ret == 0) { - mapping->writeback_index = next / PAGE_SIZE; - if (start > 0 && wbc->nr_to_write > 0) { - ret = afs_writepages_region(mapping, wbc, 0, - start, &next, false); - if (ret == 0) - mapping->writeback_index = - next / PAGE_SIZE; - } - } - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, - &next, false); - if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next / PAGE_SIZE; - } else { - ret = afs_writepages_region(mapping, wbc, - wbc->range_start, wbc->range_end, - &next, false); - } - + ret = netfs_writepages(mapping, wbc); up_read(&vnode->validate_lock); - _leave(" = %d", ret); return ret; } -/* - * write to an AFS file - */ -ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) -{ - struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); - struct afs_file *af = iocb->ki_filp->private_data; - ssize_t result; - size_t count = iov_iter_count(from); - - _enter("{%llx:%llu},{%zu},", - vnode->fid.vid, vnode->fid.vnode, count); - - if (IS_SWAPFILE(&vnode->netfs.inode)) { - printk(KERN_INFO - "AFS: Attempt to write to active swap file!\n"); - return -EBUSY; - } - - if (!count) - return 0; - - result = afs_validate(vnode, af->key); - if (result < 0) - return result; - - result = generic_file_write_iter(iocb, from); - - _leave(" = %zd", result); - return result; -} - /* * flush any dirty pages for this process, and check for write errors. * - the return status from this call provides a reliable indication of @@ -907,59 +248,11 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) { - struct folio *folio = page_folio(vmf->page); struct file *file = vmf->vma->vm_file; - struct inode *inode = file_inode(file); - struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_file *af = file->private_data; - unsigned long priv; - vm_fault_t ret = VM_FAULT_RETRY; - - _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); - - afs_validate(vnode, af->key); - sb_start_pagefault(inode->i_sb); - - /* Wait for the page to be written to the cache before we allow it to - * be modified. We then assume the entire page will need writing back. - */ -#ifdef CONFIG_AFS_FSCACHE - if (folio_test_fscache(folio) && - folio_wait_fscache_killable(folio) < 0) - goto out; -#endif - - if (folio_wait_writeback_killable(folio)) - goto out; - - if (folio_lock_killable(folio) < 0) - goto out; - - /* We mustn't change folio->private until writeback is complete as that - * details the portion of the page we need to write back and we might - * need to redirty the page if there's a problem. - */ - if (folio_wait_writeback_killable(folio) < 0) { - folio_unlock(folio); - goto out; - } - - priv = afs_folio_dirty(folio, 0, folio_size(folio)); - priv = afs_folio_dirty_mmapped(priv); - if (folio_test_private(folio)) { - folio_change_private(folio, (void *)priv); - trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio); - } else { - folio_attach_private(folio, (void *)priv); - trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio); - } - file_update_time(file); - - ret = VM_FAULT_LOCKED; -out: - sb_end_pagefault(inode->i_sb); - return ret; + if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0) + return VM_FAULT_SIGBUS; + return netfs_page_mkwrite(vmf, NULL); } /* @@ -989,64 +282,3 @@ void afs_prune_wb_keys(struct afs_vnode *vnode) afs_put_wb_key(wbk); } } - -/* - * Clean up a page during invalidation. - */ -int afs_launder_folio(struct folio *folio) -{ - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); - struct iov_iter iter; - struct bio_vec bv; - unsigned long priv; - unsigned int f, t; - int ret = 0; - - _enter("{%lx}", folio->index); - - priv = (unsigned long)folio_get_private(folio); - if (folio_clear_dirty_for_io(folio)) { - f = 0; - t = folio_size(folio); - if (folio_test_private(folio)) { - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - } - - bvec_set_folio(&bv, folio, t - f, f); - iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, bv.bv_len); - - trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio); - ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true); - } - - trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio); - folio_detach_private(folio); - folio_wait_fscache(folio); - return ret; -} - -/* - * Deal with the completion of writing the data to the cache. - */ -static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error, - bool was_async) -{ - struct afs_vnode *vnode = priv; - - if (IS_ERR_VALUE(transferred_or_error) && - transferred_or_error != -ENOBUFS) - afs_invalidate_cache(vnode, 0); -} - -/* - * Save the write to the cache also. - */ -static void afs_write_to_cache(struct afs_vnode *vnode, - loff_t start, size_t len, loff_t i_size, - bool caching) -{ - fscache_write_to_cache(afs_vnode_cache(vnode), - vnode->netfs.inode.i_mapping, start, len, i_size, - afs_write_to_cache_done, vnode, caching); -} diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 7423a3557c6807a620831475e8608a690fd3315f..1a05cecda7cc5c47695911e7d715aa215f26688d 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -27,7 +27,6 @@ bcachefs-y := \ checksum.o \ clock.o \ compress.o \ - counters.o \ darray.o \ debug.o \ dirent.o \ @@ -71,6 +70,7 @@ bcachefs-y := \ reflink.o \ replicas.o \ sb-clean.o \ + sb-counters.o \ sb-downgrade.o \ sb-errors.o \ sb-members.o \ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index a09b9d00226a4e1dd510c0c097ac59e7cb7d3c77..10704f2d3af5302f71a931e13bd0ba5432d46fe2 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -273,7 +273,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v), c, err, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", - bch2_data_types[a.v->data_type]); + bch2_data_type_str(a.v->data_type)); break; case BCH_DATA_cached: bkey_fsck_err_on(!a.v->cached_sectors || @@ -321,16 +321,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); - unsigned i; prt_newline(out); printbuf_indent_add(out, 2); - prt_printf(out, "gen %u oldest_gen %u data_type %s", - a->gen, a->oldest_gen, - a->data_type < BCH_DATA_NR - ? bch2_data_types[a->data_type] - : "(invalid data type)"); + prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); + bch2_prt_data_type(out, a->data_type); prt_newline(out); prt_printf(out, "journal_seq %llu", a->journal_seq); prt_newline(out); @@ -353,23 +349,6 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, "fragmentation %llu", a->fragmentation_lru); prt_newline(out); prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); - prt_newline(out); - - if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) { - struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); - const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); - - prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); - printbuf_indent_add(out, 2); - - for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) { - prt_newline(out); - bch2_backpointer_to_text(out, &bps[i]); - } - - printbuf_indent_sub(out, 2); - } - printbuf_indent_sub(out, 2); } @@ -839,7 +818,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, } } - if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; u64 journal_seq = trans->journal_res.seq; u64 bucket_journal_seq = new_a->journal_seq; @@ -1625,13 +1604,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) return ret; } +struct discard_buckets_state { + u64 seen; + u64 open; + u64 need_journal_commit; + u64 discarded; + struct bch_dev *ca; + u64 need_journal_commit_this_dev; +}; + +static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca) +{ + if (s->ca == ca) + return; + + if (s->ca && s->need_journal_commit_this_dev > + bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets) + bch2_journal_flush_async(&c->journal, NULL); + + if (s->ca) + percpu_ref_put(&s->ca->ref); + if (ca) + percpu_ref_get(&ca->ref); + s->ca = ca; + s->need_journal_commit_this_dev = 0; +} + static int bch2_discard_one_bucket(struct btree_trans *trans, struct btree_iter *need_discard_iter, struct bpos *discard_pos_done, - u64 *seen, - u64 *open, - u64 *need_journal_commit, - u64 *discarded) + struct discard_buckets_state *s) { struct bch_fs *c = trans->c; struct bpos pos = need_discard_iter->pos; @@ -1643,20 +1645,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, int ret = 0; ca = bch_dev_bkey_exists(c, pos.inode); + if (!percpu_ref_tryget(&ca->io_ref)) { bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); return 0; } + discard_buckets_next_dev(c, s, ca); + if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { - (*open)++; + s->open++; goto out; } if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, pos.inode, pos.offset)) { - (*need_journal_commit)++; + s->need_journal_commit++; + s->need_journal_commit_this_dev++; goto out; } @@ -1732,9 +1738,9 @@ write: goto out; count_event(c, bucket_discard); - (*discarded)++; + s->discarded++; out: - (*seen)++; + s->seen++; bch2_trans_iter_exit(trans, &iter); percpu_ref_put(&ca->io_ref); printbuf_exit(&buf); @@ -1744,7 +1750,7 @@ out: static void bch2_do_discards_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, discard_work); - u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; + struct discard_buckets_state s = {}; struct bpos discard_pos_done = POS_MAX; int ret; @@ -1756,19 +1762,14 @@ static void bch2_do_discards_work(struct work_struct *work) ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_need_discard, POS_MIN, 0, k, - bch2_discard_one_bucket(trans, &iter, &discard_pos_done, - &seen, - &open, - &need_journal_commit, - &discarded))); - - if (need_journal_commit * 2 > seen) - bch2_journal_flush_async(&c->journal, NULL); + bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s))); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + discard_buckets_next_dev(c, &s, NULL); - trace_discard_buckets(c, seen, open, need_journal_commit, discarded, + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); + + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_do_discards(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h new file mode 100644 index 0000000000000000000000000000000000000000..b4ec20be93b86cd719322f893ff8b4cd4379d774 --- /dev/null +++ b/fs/bcachefs/alloc_background_format.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H +#define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H + +struct bch_alloc { + struct bch_val v; + __u8 fields; + __u8 gen; + __u8 data[]; +} __packed __aligned(8); + +#define BCH_ALLOC_FIELDS_V1() \ + x(read_time, 16) \ + x(write_time, 16) \ + x(data_type, 8) \ + x(dirty_sectors, 16) \ + x(cached_sectors, 16) \ + x(oldest_gen, 8) \ + x(stripe, 32) \ + x(stripe_redundancy, 8) + +enum { +#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, + BCH_ALLOC_FIELDS_V1() +#undef x +}; + +struct bch_alloc_v2 { + struct bch_val v; + __u8 nr_fields; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 data[]; +} __packed __aligned(8); + +#define BCH_ALLOC_FIELDS_V2() \ + x(read_time, 64) \ + x(write_time, 64) \ + x(dirty_sectors, 32) \ + x(cached_sectors, 32) \ + x(stripe, 32) \ + x(stripe_redundancy, 8) + +struct bch_alloc_v3 { + struct bch_val v; + __le64 journal_seq; + __le32 flags; + __u8 nr_fields; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 data[]; +} __packed __aligned(8); + +LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) +LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) + +struct bch_alloc_v4 { + struct bch_val v; + __u64 journal_seq; + __u32 flags; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 stripe_redundancy; + __u32 dirty_sectors; + __u32 cached_sectors; + __u64 io_time[2]; + __u32 stripe; + __u32 nr_external_backpointers; + __u64 fragmentation_lru; +} __packed __aligned(8); + +#define BCH_ALLOC_V4_U64s_V0 6 +#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) + +BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) +BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) +BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) +BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) + +#define KEY_TYPE_BUCKET_GENS_BITS 8 +#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS) +#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1) + +struct bch_bucket_gens { + struct bch_val v; + u8 gens[KEY_TYPE_BUCKET_GENS_NR]; +} __packed __aligned(8); + +#endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index b0ff47998a9440912f940dc09e27b34e6341cb9e..633d3223b353f83e83501601024dd262952236c6 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1525,10 +1525,11 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str unsigned data_type = ob->data_type; barrier(); /* READ_ONCE() doesn't work on bitfields */ - prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u", + prt_printf(out, "%zu ref %u ", ob - c->open_buckets, - atomic_read(&ob->pin), - data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type", + atomic_read(&ob->pin)); + bch2_prt_data_type(out, data_type); + prt_printf(out, " %u:%llu gen %u allocated %u/%u", ob->dev, ob->bucket, ob->gen, ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size); if (ob->ec) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e358a2ffffdea48c80eee18ab299cd7103d72991..b4dc319bcb2bc0a5363e74f6d2096d3b5652599d 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -400,13 +400,24 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } +static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) +{ + return bpos_eq(l.k->p, r.k->p) && + bkey_bytes(l.k) == bkey_bytes(r.k) && + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); +} + +struct extents_to_bp_state { + struct bpos bucket_start; + struct bpos bucket_end; + struct bkey_buf last_flushed; +}; + static int check_bp_exists(struct btree_trans *trans, + struct extents_to_bp_state *s, struct bpos bucket, struct bch_backpointer bp, - struct bkey_s_c orig_k, - struct bpos bucket_start, - struct bpos bucket_end, - struct bkey_buf *last_flushed) + struct bkey_s_c orig_k) { struct bch_fs *c = trans->c; struct btree_iter bp_iter = { NULL }; @@ -417,8 +428,8 @@ static int check_bp_exists(struct btree_trans *trans, bch2_bkey_buf_init(&tmp); - if (bpos_lt(bucket, bucket_start) || - bpos_gt(bucket, bucket_end)) + if (bpos_lt(bucket, s->bucket_start) || + bpos_gt(bucket, s->bucket_end)) return 0; if (!bch2_dev_bucket_exists(c, bucket)) @@ -433,11 +444,9 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) || - bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) || - memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) { - bch2_bkey_buf_reassemble(&tmp, c, orig_k); + bch2_bkey_buf_reassemble(&tmp, c, orig_k); + if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { if (bp.level) { bch2_trans_unlock(trans); bch2_btree_interior_updates_flush(c); @@ -447,7 +456,7 @@ static int check_bp_exists(struct btree_trans *trans, if (ret) goto err; - bch2_bkey_buf_copy(last_flushed, c, tmp.k); + bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } @@ -475,10 +484,8 @@ missing: } static int check_extent_to_backpointers(struct btree_trans *trans, + struct extents_to_bp_state *s, enum btree_id btree, unsigned level, - struct bpos bucket_start, - struct bpos bucket_end, - struct bkey_buf *last_flushed, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -498,9 +505,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, bch2_extent_ptr_to_bp(c, btree, level, k, p, &bucket_pos, &bp); - ret = check_bp_exists(trans, bucket_pos, bp, k, - bucket_start, bucket_end, - last_flushed); + ret = check_bp_exists(trans, s, bucket_pos, bp, k); if (ret) return ret; } @@ -509,10 +514,8 @@ static int check_extent_to_backpointers(struct btree_trans *trans, } static int check_btree_root_to_backpointers(struct btree_trans *trans, + struct extents_to_bp_state *s, enum btree_id btree_id, - struct bpos bucket_start, - struct bpos bucket_end, - struct bkey_buf *last_flushed, int *level) { struct bch_fs *c = trans->c; @@ -536,9 +539,7 @@ retry: *level = b->c.level; k = bkey_i_to_s_c(&b->key); - ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1, - bucket_start, bucket_end, - last_flushed, k); + ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -559,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c) si_meminfo(&i); mem_bytes = i.totalram * i.mem_unit; - return div_u64(mem_bytes >> 1, btree_bytes(c)); + return div_u64(mem_bytes >> 1, c->opts.btree_node_size); } static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, @@ -610,43 +611,35 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, } static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, - struct bpos bucket_start, - struct bpos bucket_end) + struct extents_to_bp_state *s) { struct bch_fs *c = trans->c; - struct btree_iter iter; - enum btree_id btree_id; - struct bkey_s_c k; - struct bkey_buf last_flushed; int ret = 0; - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - - for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { + for (enum btree_id btree_id = 0; + btree_id < btree_id_nr_alive(c); + btree_id++) { int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_btree_root_to_backpointers(trans, btree_id, - bucket_start, bucket_end, - &last_flushed, &level)); + check_btree_root_to_backpointers(trans, s, btree_id, &level)); if (ret) return ret; while (level >= depth) { + struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, BTREE_ITER_PREFETCH); while (1) { bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + + struct bkey_s_c k = bch2_btree_iter_peek(&iter); if (!k.k) break; ret = bkey_err(k) ?: - check_extent_to_backpointers(trans, btree_id, level, - bucket_start, bucket_end, - &last_flushed, k) ?: + check_extent_to_backpointers(trans, s, btree_id, level, k) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { @@ -668,7 +661,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, } } - bch2_bkey_buf_exit(&last_flushed, c); return 0; } @@ -731,37 +723,43 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans, int bch2_check_extents_to_backpointers(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct bpos start = POS_MIN, end; + struct extents_to_bp_state s = { .bucket_start = POS_MIN }; int ret; + bch2_bkey_buf_init(&s.last_flushed); + bkey_init(&s.last_flushed.k->k); + while (1) { - ret = bch2_get_alloc_in_memory_pos(trans, start, &end); + ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end); if (ret) break; - if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX)) + if ( bpos_eq(s.bucket_start, POS_MIN) && + !bpos_eq(s.bucket_end, SPOS_MAX)) bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", __func__, btree_nodes_fit_in_ram(c)); - if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) { + if (!bpos_eq(s.bucket_start, POS_MIN) || + !bpos_eq(s.bucket_end, SPOS_MAX)) { struct printbuf buf = PRINTBUF; prt_str(&buf, "check_extents_to_backpointers(): "); - bch2_bpos_to_text(&buf, start); + bch2_bpos_to_text(&buf, s.bucket_start); prt_str(&buf, "-"); - bch2_bpos_to_text(&buf, end); + bch2_bpos_to_text(&buf, s.bucket_end); bch_verbose(c, "%s", buf.buf); printbuf_exit(&buf); } - ret = bch2_check_extents_to_backpointers_pass(trans, start, end); - if (ret || bpos_eq(end, SPOS_MAX)) + ret = bch2_check_extents_to_backpointers_pass(trans, &s); + if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) break; - start = bpos_successor(end); + s.bucket_start = bpos_successor(s.bucket_end); } bch2_trans_put(trans); + bch2_bkey_buf_exit(&s.last_flushed, c); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 737e2396ade7ec44edf4f18738e286b5da3189bd..327365a9feac4e8fa69575ec6fe6157fd3edb127 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H #define _BCACHEFS_BACKPOINTERS_BACKGROUND_H +#include "btree_cache.h" #include "btree_iter.h" #include "btree_update.h" #include "buckets.h" diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index dac383e3718163b6566eb2e6a4ff305fb65da715..b80c6c9efd8cef95b46b5b45b21f639e18373755 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c) return c->opts.block_size >> 9; } -static inline size_t btree_sectors(const struct bch_fs *c) -{ - return c->opts.btree_node_size >> 9; -} - static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) { return c->btree_key_cache_btrees & (1U << btree); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 0d5ac4184fbcef5a2b7ae618d6bdf81478f09530..0668b682a21ca8e035cae73f73e6774c99eaeb94 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -417,600 +417,12 @@ struct bch_set { struct bch_val v; }; -/* Extents */ - -/* - * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally - * preceded by checksum/compression information (bch_extent_crc32 or - * bch_extent_crc64). - * - * One major determining factor in the format of extents is how we handle and - * represent extents that have been partially overwritten and thus trimmed: - * - * If an extent is not checksummed or compressed, when the extent is trimmed we - * don't have to remember the extent we originally allocated and wrote: we can - * merely adjust ptr->offset to point to the start of the data that is currently - * live. The size field in struct bkey records the current (live) size of the - * extent, and is also used to mean "size of region on disk that we point to" in - * this case. - * - * Thus an extent that is not checksummed or compressed will consist only of a - * list of bch_extent_ptrs, with none of the fields in - * bch_extent_crc32/bch_extent_crc64. - * - * When an extent is checksummed or compressed, it's not possible to read only - * the data that is currently live: we have to read the entire extent that was - * originally written, and then return only the part of the extent that is - * currently live. - * - * Thus, in addition to the current size of the extent in struct bkey, we need - * to store the size of the originally allocated space - this is the - * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, - * when the extent is trimmed, instead of modifying the offset field of the - * pointer, we keep a second smaller offset field - "offset into the original - * extent of the currently live region". - * - * The other major determining factor is replication and data migration: - * - * Each pointer may have its own bch_extent_crc32/64. When doing a replicated - * write, we will initially write all the replicas in the same format, with the - * same checksum type and compression format - however, when copygc runs later (or - * tiering/cache promotion, anything that moves data), it is not in general - * going to rewrite all the pointers at once - one of the replicas may be in a - * bucket on one device that has very little fragmentation while another lives - * in a bucket that has become heavily fragmented, and thus is being rewritten - * sooner than the rest. - * - * Thus it will only move a subset of the pointers (or in the case of - * tiering/cache promotion perhaps add a single pointer without dropping any - * current pointers), and if the extent has been partially overwritten it must - * write only the currently live portion (or copygc would not be able to reduce - * fragmentation!) - which necessitates a different bch_extent_crc format for - * the new pointer. - * - * But in the interests of space efficiency, we don't want to store one - * bch_extent_crc for each pointer if we don't have to. - * - * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and - * bch_extent_ptrs appended arbitrarily one after the other. We determine the - * type of a given entry with a scheme similar to utf8 (except we're encoding a - * type, not a size), encoding the type in the position of the first set bit: - * - * bch_extent_crc32 - 0b1 - * bch_extent_ptr - 0b10 - * bch_extent_crc64 - 0b100 - * - * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and - * bch_extent_crc64 is the least constrained). - * - * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, - * until the next bch_extent_crc32/64. - * - * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer - * is neither checksummed nor compressed. - */ - /* 128 bits, sufficient for cryptographic MACs: */ struct bch_csum { __le64 lo; __le64 hi; } __packed __aligned(8); -#define BCH_EXTENT_ENTRY_TYPES() \ - x(ptr, 0) \ - x(crc32, 1) \ - x(crc64, 2) \ - x(crc128, 3) \ - x(stripe_ptr, 4) \ - x(rebalance, 5) -#define BCH_EXTENT_ENTRY_MAX 6 - -enum bch_extent_entry_type { -#define x(f, n) BCH_EXTENT_ENTRY_##f = n, - BCH_EXTENT_ENTRY_TYPES() -#undef x -}; - -/* Compressed/uncompressed size are stored biased by 1: */ -struct bch_extent_crc32 { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u32 type:2, - _compressed_size:7, - _uncompressed_size:7, - offset:7, - _unused:1, - csum_type:4, - compression_type:4; - __u32 csum; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u32 csum; - __u32 compression_type:4, - csum_type:4, - _unused:1, - offset:7, - _uncompressed_size:7, - _compressed_size:7, - type:2; -#endif -} __packed __aligned(8); - -#define CRC32_SIZE_MAX (1U << 7) -#define CRC32_NONCE_MAX 0 - -struct bch_extent_crc64 { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:3, - _compressed_size:9, - _uncompressed_size:9, - offset:9, - nonce:10, - csum_type:4, - compression_type:4, - csum_hi:16; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 csum_hi:16, - compression_type:4, - csum_type:4, - nonce:10, - offset:9, - _uncompressed_size:9, - _compressed_size:9, - type:3; -#endif - __u64 csum_lo; -} __packed __aligned(8); - -#define CRC64_SIZE_MAX (1U << 9) -#define CRC64_NONCE_MAX ((1U << 10) - 1) - -struct bch_extent_crc128 { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:4, - _compressed_size:13, - _uncompressed_size:13, - offset:13, - nonce:13, - csum_type:4, - compression_type:4; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 compression_type:4, - csum_type:4, - nonce:13, - offset:13, - _uncompressed_size:13, - _compressed_size:13, - type:4; -#endif - struct bch_csum csum; -} __packed __aligned(8); - -#define CRC128_SIZE_MAX (1U << 13) -#define CRC128_NONCE_MAX ((1U << 13) - 1) - -/* - * @reservation - pointer hasn't been written to, just reserved - */ -struct bch_extent_ptr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:1, - cached:1, - unused:1, - unwritten:1, - offset:44, /* 8 petabytes */ - dev:8, - gen:8; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 gen:8, - dev:8, - offset:44, - unwritten:1, - unused:1, - cached:1, - type:1; -#endif -} __packed __aligned(8); - -struct bch_extent_stripe_ptr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:5, - block:8, - redundancy:4, - idx:47; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 idx:47, - redundancy:4, - block:8, - type:5; -#endif -}; - -struct bch_extent_rebalance { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:6, - unused:34, - compression:8, /* enum bch_compression_opt */ - target:16; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 target:16, - compression:8, - unused:34, - type:6; -#endif -}; - -union bch_extent_entry { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 - unsigned long type; -#elif __BITS_PER_LONG == 32 - struct { - unsigned long pad; - unsigned long type; - }; -#else -#error edit for your odd byteorder. -#endif - -#define x(f, n) struct bch_extent_##f f; - BCH_EXTENT_ENTRY_TYPES() -#undef x -}; - -struct bch_btree_ptr { - struct bch_val v; - - __u64 _data[0]; - struct bch_extent_ptr start[]; -} __packed __aligned(8); - -struct bch_btree_ptr_v2 { - struct bch_val v; - - __u64 mem_ptr; - __le64 seq; - __le16 sectors_written; - __le16 flags; - struct bpos min_key; - __u64 _data[0]; - struct bch_extent_ptr start[]; -} __packed __aligned(8); - -LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); - -struct bch_extent { - struct bch_val v; - - __u64 _data[0]; - union bch_extent_entry start[]; -} __packed __aligned(8); - -struct bch_reservation { - struct bch_val v; - - __le32 generation; - __u8 nr_replicas; - __u8 pad[3]; -} __packed __aligned(8); - -/* Maximum size (in u64s) a single pointer could be: */ -#define BKEY_EXTENT_PTR_U64s_MAX\ - ((sizeof(struct bch_extent_crc128) + \ - sizeof(struct bch_extent_ptr)) / sizeof(__u64)) - -/* Maximum possible size of an entire extent value: */ -#define BKEY_EXTENT_VAL_U64s_MAX \ - (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) - -/* * Maximum possible size of an entire extent, key + value: */ -#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) - -/* Btree pointers don't carry around checksums: */ -#define BKEY_BTREE_PTR_VAL_U64s_MAX \ - ((sizeof(struct bch_btree_ptr_v2) + \ - sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) -#define BKEY_BTREE_PTR_U64s_MAX \ - (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) - -/* Inodes */ - -#define BLOCKDEV_INODE_MAX 4096 - -#define BCACHEFS_ROOT_INO 4096 - -struct bch_inode { - struct bch_val v; - - __le64 bi_hash_seed; - __le32 bi_flags; - __le16 bi_mode; - __u8 fields[]; -} __packed __aligned(8); - -struct bch_inode_v2 { - struct bch_val v; - - __le64 bi_journal_seq; - __le64 bi_hash_seed; - __le64 bi_flags; - __le16 bi_mode; - __u8 fields[]; -} __packed __aligned(8); - -struct bch_inode_v3 { - struct bch_val v; - - __le64 bi_journal_seq; - __le64 bi_hash_seed; - __le64 bi_flags; - __le64 bi_sectors; - __le64 bi_size; - __le64 bi_version; - __u8 fields[]; -} __packed __aligned(8); - -#define INODEv3_FIELDS_START_INITIAL 6 -#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) - -struct bch_inode_generation { - struct bch_val v; - - __le32 bi_generation; - __le32 pad; -} __packed __aligned(8); - -/* - * bi_subvol and bi_parent_subvol are only set for subvolume roots: - */ - -#define BCH_INODE_FIELDS_v2() \ - x(bi_atime, 96) \ - x(bi_ctime, 96) \ - x(bi_mtime, 96) \ - x(bi_otime, 96) \ - x(bi_size, 64) \ - x(bi_sectors, 64) \ - x(bi_uid, 32) \ - x(bi_gid, 32) \ - x(bi_nlink, 32) \ - x(bi_generation, 32) \ - x(bi_dev, 32) \ - x(bi_data_checksum, 8) \ - x(bi_compression, 8) \ - x(bi_project, 32) \ - x(bi_background_compression, 8) \ - x(bi_data_replicas, 8) \ - x(bi_promote_target, 16) \ - x(bi_foreground_target, 16) \ - x(bi_background_target, 16) \ - x(bi_erasure_code, 16) \ - x(bi_fields_set, 16) \ - x(bi_dir, 64) \ - x(bi_dir_offset, 64) \ - x(bi_subvol, 32) \ - x(bi_parent_subvol, 32) - -#define BCH_INODE_FIELDS_v3() \ - x(bi_atime, 96) \ - x(bi_ctime, 96) \ - x(bi_mtime, 96) \ - x(bi_otime, 96) \ - x(bi_uid, 32) \ - x(bi_gid, 32) \ - x(bi_nlink, 32) \ - x(bi_generation, 32) \ - x(bi_dev, 32) \ - x(bi_data_checksum, 8) \ - x(bi_compression, 8) \ - x(bi_project, 32) \ - x(bi_background_compression, 8) \ - x(bi_data_replicas, 8) \ - x(bi_promote_target, 16) \ - x(bi_foreground_target, 16) \ - x(bi_background_target, 16) \ - x(bi_erasure_code, 16) \ - x(bi_fields_set, 16) \ - x(bi_dir, 64) \ - x(bi_dir_offset, 64) \ - x(bi_subvol, 32) \ - x(bi_parent_subvol, 32) \ - x(bi_nocow, 8) - -/* subset of BCH_INODE_FIELDS */ -#define BCH_INODE_OPTS() \ - x(data_checksum, 8) \ - x(compression, 8) \ - x(project, 32) \ - x(background_compression, 8) \ - x(data_replicas, 8) \ - x(promote_target, 16) \ - x(foreground_target, 16) \ - x(background_target, 16) \ - x(erasure_code, 16) \ - x(nocow, 8) - -enum inode_opt_id { -#define x(name, ...) \ - Inode_opt_##name, - BCH_INODE_OPTS() -#undef x - Inode_opt_nr, -}; - -#define BCH_INODE_FLAGS() \ - x(sync, 0) \ - x(immutable, 1) \ - x(append, 2) \ - x(nodump, 3) \ - x(noatime, 4) \ - x(i_size_dirty, 5) \ - x(i_sectors_dirty, 6) \ - x(unlinked, 7) \ - x(backptr_untrusted, 8) - -/* bits 20+ reserved for packed fields below: */ - -enum bch_inode_flags { -#define x(t, n) BCH_INODE_##t = 1U << n, - BCH_INODE_FLAGS() -#undef x -}; - -enum __bch_inode_flags { -#define x(t, n) __BCH_INODE_##t = n, - BCH_INODE_FLAGS() -#undef x -}; - -LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); -LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); -LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); - -LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); -LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); - -LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); -LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); - -LE64_BITMASK(INODEv3_FIELDS_START, - struct bch_inode_v3, bi_flags, 31, 36); -LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); - -/* Dirents */ - -/* - * Dirents (and xattrs) have to implement string lookups; since our b-tree - * doesn't support arbitrary length strings for the key, we instead index by a - * 64 bit hash (currently truncated sha1) of the string, stored in the offset - * field of the key - using linear probing to resolve hash collisions. This also - * provides us with the readdir cookie posix requires. - * - * Linear probing requires us to use whiteouts for deletions, in the event of a - * collision: - */ - -struct bch_dirent { - struct bch_val v; - - /* Target inode number: */ - union { - __le64 d_inum; - struct { /* DT_SUBVOL */ - __le32 d_child_subvol; - __le32 d_parent_subvol; - }; - }; - - /* - * Copy of mode bits 12-15 from the target inode - so userspace can get - * the filetype without having to do a stat() - */ - __u8 d_type; - - __u8 d_name[]; -} __packed __aligned(8); - -#define DT_SUBVOL 16 -#define BCH_DT_MAX 17 - -#define BCH_NAME_MAX 512 - -/* Xattrs */ - -#define KEY_TYPE_XATTR_INDEX_USER 0 -#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 -#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 -#define KEY_TYPE_XATTR_INDEX_SECURITY 4 - -struct bch_xattr { - struct bch_val v; - __u8 x_type; - __u8 x_name_len; - __le16 x_val_len; - __u8 x_name[]; -} __packed __aligned(8); - -/* Bucket/allocation information: */ - -struct bch_alloc { - struct bch_val v; - __u8 fields; - __u8 gen; - __u8 data[]; -} __packed __aligned(8); - -#define BCH_ALLOC_FIELDS_V1() \ - x(read_time, 16) \ - x(write_time, 16) \ - x(data_type, 8) \ - x(dirty_sectors, 16) \ - x(cached_sectors, 16) \ - x(oldest_gen, 8) \ - x(stripe, 32) \ - x(stripe_redundancy, 8) - -enum { -#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, - BCH_ALLOC_FIELDS_V1() -#undef x -}; - -struct bch_alloc_v2 { - struct bch_val v; - __u8 nr_fields; - __u8 gen; - __u8 oldest_gen; - __u8 data_type; - __u8 data[]; -} __packed __aligned(8); - -#define BCH_ALLOC_FIELDS_V2() \ - x(read_time, 64) \ - x(write_time, 64) \ - x(dirty_sectors, 32) \ - x(cached_sectors, 32) \ - x(stripe, 32) \ - x(stripe_redundancy, 8) - -struct bch_alloc_v3 { - struct bch_val v; - __le64 journal_seq; - __le32 flags; - __u8 nr_fields; - __u8 gen; - __u8 oldest_gen; - __u8 data_type; - __u8 data[]; -} __packed __aligned(8); - -LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) -LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) - -struct bch_alloc_v4 { - struct bch_val v; - __u64 journal_seq; - __u32 flags; - __u8 gen; - __u8 oldest_gen; - __u8 data_type; - __u8 stripe_redundancy; - __u32 dirty_sectors; - __u32 cached_sectors; - __u64 io_time[2]; - __u32 stripe; - __u32 nr_external_backpointers; - __u64 fragmentation_lru; -} __packed __aligned(8); - -#define BCH_ALLOC_V4_U64s_V0 6 -#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) - -BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) -BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) -BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) -BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) - -#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40 - struct bch_backpointer { struct bch_val v; __u8 btree_id; @@ -1021,154 +433,6 @@ struct bch_backpointer { struct bpos pos; } __packed __aligned(8); -#define KEY_TYPE_BUCKET_GENS_BITS 8 -#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS) -#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1) - -struct bch_bucket_gens { - struct bch_val v; - u8 gens[KEY_TYPE_BUCKET_GENS_NR]; -} __packed __aligned(8); - -/* Quotas: */ - -enum quota_types { - QTYP_USR = 0, - QTYP_GRP = 1, - QTYP_PRJ = 2, - QTYP_NR = 3, -}; - -enum quota_counters { - Q_SPC = 0, - Q_INO = 1, - Q_COUNTERS = 2, -}; - -struct bch_quota_counter { - __le64 hardlimit; - __le64 softlimit; -}; - -struct bch_quota { - struct bch_val v; - struct bch_quota_counter c[Q_COUNTERS]; -} __packed __aligned(8); - -/* Erasure coding */ - -struct bch_stripe { - struct bch_val v; - __le16 sectors; - __u8 algorithm; - __u8 nr_blocks; - __u8 nr_redundant; - - __u8 csum_granularity_bits; - __u8 csum_type; - __u8 pad; - - struct bch_extent_ptr ptrs[]; -} __packed __aligned(8); - -/* Reflink: */ - -struct bch_reflink_p { - struct bch_val v; - __le64 idx; - /* - * A reflink pointer might point to an indirect extent which is then - * later split (by copygc or rebalance). If we only pointed to part of - * the original indirect extent, and then one of the fragments is - * outside the range we point to, we'd leak a refcount: so when creating - * reflink pointers, we need to store pad values to remember the full - * range we were taking a reference on. - */ - __le32 front_pad; - __le32 back_pad; -} __packed __aligned(8); - -struct bch_reflink_v { - struct bch_val v; - __le64 refcount; - union bch_extent_entry start[0]; - __u64 _data[]; -} __packed __aligned(8); - -struct bch_indirect_inline_data { - struct bch_val v; - __le64 refcount; - u8 data[]; -}; - -/* Inline data */ - -struct bch_inline_data { - struct bch_val v; - u8 data[]; -}; - -/* Subvolumes: */ - -#define SUBVOL_POS_MIN POS(0, 1) -#define SUBVOL_POS_MAX POS(0, S32_MAX) -#define BCACHEFS_ROOT_SUBVOL 1 - -struct bch_subvolume { - struct bch_val v; - __le32 flags; - __le32 snapshot; - __le64 inode; - /* - * Snapshot subvolumes form a tree, separate from the snapshot nodes - * tree - if this subvolume is a snapshot, this is the ID of the - * subvolume it was created from: - */ - __le32 parent; - __le32 pad; - bch_le128 otime; -}; - -LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) -/* - * We need to know whether a subvolume is a snapshot so we can know whether we - * can delete it (or whether it should just be rm -rf'd) - */ -LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) -LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) - -/* Snapshots */ - -struct bch_snapshot { - struct bch_val v; - __le32 flags; - __le32 parent; - __le32 children[2]; - __le32 subvol; - /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */ - __le32 tree; - __le32 depth; - __le32 skip[3]; -}; - -LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) - -/* True if a subvolume points to this snapshot node: */ -LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) - -/* - * Snapshot trees: - * - * The snapshot_trees btree gives us persistent indentifier for each tree of - * bch_snapshot nodes, and allow us to record and easily find the root/master - * subvolume that other snapshots were created from: - */ -struct bch_snapshot_tree { - struct bch_val v; - __le32 master_subvol; - __le32 root_snapshot; -}; - /* LRU btree: */ struct bch_lru { @@ -1178,33 +442,6 @@ struct bch_lru { #define LRU_ID_STRIPES (1U << 16) -/* Logged operations btree: */ - -struct bch_logged_op_truncate { - struct bch_val v; - __le32 subvol; - __le32 pad; - __le64 inum; - __le64 new_i_size; -}; - -enum logged_op_finsert_state { - LOGGED_OP_FINSERT_start, - LOGGED_OP_FINSERT_shift_extents, - LOGGED_OP_FINSERT_finish, -}; - -struct bch_logged_op_finsert { - struct bch_val v; - __u8 state; - __u8 pad[3]; - __le32 subvol; - __le64 inum; - __le64 dst_offset; - __le64 src_offset; - __le64 pos; -}; - /* Optional/variable size superblock sections: */ struct bch_sb_field { @@ -1230,6 +467,19 @@ struct bch_sb_field { x(ext, 13) \ x(downgrade, 14) +#include "alloc_background_format.h" +#include "extents_format.h" +#include "reflink_format.h" +#include "ec_format.h" +#include "inode_format.h" +#include "dirent_format.h" +#include "xattr_format.h" +#include "quota_format.h" +#include "logged_ops_format.h" +#include "snapshot_format.h" +#include "subvolume_format.h" +#include "sb-counters_format.h" + enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, BCH_SB_FIELDS() @@ -1465,23 +715,6 @@ struct bch_sb_field_replicas { struct bch_replicas_entry_v1 entries[]; } __packed __aligned(8); -/* BCH_SB_FIELD_quota: */ - -struct bch_sb_quota_counter { - __le32 timelimit; - __le32 warnlimit; -}; - -struct bch_sb_quota_type { - __le64 flags; - struct bch_sb_quota_counter c[Q_COUNTERS]; -}; - -struct bch_sb_field_quota { - struct bch_sb_field field; - struct bch_sb_quota_type q[QTYP_NR]; -} __packed __aligned(8); - /* BCH_SB_FIELD_disk_groups: */ #define BCH_SB_LABEL_SIZE 32 @@ -1500,101 +733,6 @@ struct bch_sb_field_disk_groups { struct bch_disk_group entries[]; } __packed __aligned(8); -/* BCH_SB_FIELD_counters */ - -#define BCH_PERSISTENT_COUNTERS() \ - x(io_read, 0) \ - x(io_write, 1) \ - x(io_move, 2) \ - x(bucket_invalidate, 3) \ - x(bucket_discard, 4) \ - x(bucket_alloc, 5) \ - x(bucket_alloc_fail, 6) \ - x(btree_cache_scan, 7) \ - x(btree_cache_reap, 8) \ - x(btree_cache_cannibalize, 9) \ - x(btree_cache_cannibalize_lock, 10) \ - x(btree_cache_cannibalize_lock_fail, 11) \ - x(btree_cache_cannibalize_unlock, 12) \ - x(btree_node_write, 13) \ - x(btree_node_read, 14) \ - x(btree_node_compact, 15) \ - x(btree_node_merge, 16) \ - x(btree_node_split, 17) \ - x(btree_node_rewrite, 18) \ - x(btree_node_alloc, 19) \ - x(btree_node_free, 20) \ - x(btree_node_set_root, 21) \ - x(btree_path_relock_fail, 22) \ - x(btree_path_upgrade_fail, 23) \ - x(btree_reserve_get_fail, 24) \ - x(journal_entry_full, 25) \ - x(journal_full, 26) \ - x(journal_reclaim_finish, 27) \ - x(journal_reclaim_start, 28) \ - x(journal_write, 29) \ - x(read_promote, 30) \ - x(read_bounce, 31) \ - x(read_split, 33) \ - x(read_retry, 32) \ - x(read_reuse_race, 34) \ - x(move_extent_read, 35) \ - x(move_extent_write, 36) \ - x(move_extent_finish, 37) \ - x(move_extent_fail, 38) \ - x(move_extent_start_fail, 39) \ - x(copygc, 40) \ - x(copygc_wait, 41) \ - x(gc_gens_end, 42) \ - x(gc_gens_start, 43) \ - x(trans_blocked_journal_reclaim, 44) \ - x(trans_restart_btree_node_reused, 45) \ - x(trans_restart_btree_node_split, 46) \ - x(trans_restart_fault_inject, 47) \ - x(trans_restart_iter_upgrade, 48) \ - x(trans_restart_journal_preres_get, 49) \ - x(trans_restart_journal_reclaim, 50) \ - x(trans_restart_journal_res_get, 51) \ - x(trans_restart_key_cache_key_realloced, 52) \ - x(trans_restart_key_cache_raced, 53) \ - x(trans_restart_mark_replicas, 54) \ - x(trans_restart_mem_realloced, 55) \ - x(trans_restart_memory_allocation_failure, 56) \ - x(trans_restart_relock, 57) \ - x(trans_restart_relock_after_fill, 58) \ - x(trans_restart_relock_key_cache_fill, 59) \ - x(trans_restart_relock_next_node, 60) \ - x(trans_restart_relock_parent_for_fill, 61) \ - x(trans_restart_relock_path, 62) \ - x(trans_restart_relock_path_intent, 63) \ - x(trans_restart_too_many_iters, 64) \ - x(trans_restart_traverse, 65) \ - x(trans_restart_upgrade, 66) \ - x(trans_restart_would_deadlock, 67) \ - x(trans_restart_would_deadlock_write, 68) \ - x(trans_restart_injected, 69) \ - x(trans_restart_key_cache_upgrade, 70) \ - x(trans_traverse_all, 71) \ - x(transaction_commit, 72) \ - x(write_super, 73) \ - x(trans_restart_would_deadlock_recursion_limit, 74) \ - x(trans_restart_write_buffer_flush, 75) \ - x(trans_restart_split_race, 76) \ - x(write_buffer_flush_slowpath, 77) \ - x(write_buffer_flush_sync, 78) - -enum bch_persistent_counters { -#define x(t, n, ...) BCH_COUNTER_##t, - BCH_PERSISTENT_COUNTERS() -#undef x - BCH_COUNTER_NR -}; - -struct bch_sb_field_counters { - struct bch_sb_field field; - __le64 d[]; -}; - /* * On clean shutdown, store btree roots and current journal sequence number in * the superblock: diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index abdb05507d162c7c06bb89ce96bf67f6484207a7..76e79a15ba08fb23ed9d0560dcd5966fe68ce92a 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -33,7 +33,7 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *out, next_key_bits -= 64; } - bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); + bch2_prt_u64_base2_nbits(out, v, min(word_bits, nr_key_bits)); if (!next_key_bits) break; diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 761f5e33b1e69e94ca0aaaa41a9825e496b5840f..5e52684764eb14de4d8433abd5954a829648440b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -63,8 +63,17 @@ static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, return 0; } +static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_cookie ck = bkey_s_c_to_cookie(k); + + prt_printf(out, "%llu", le64_to_cpu(ck.v->cookie)); +} + #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ .key_invalid = key_type_cookie_invalid, \ + .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index ee82283722b759bbce174b2d902403c0024fe574..03efe8ee565a90672367c2146e3ff44ceb0db526 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -83,9 +83,10 @@ enum btree_update_flags { __BTREE_TRIGGER_NORUN, __BTREE_TRIGGER_TRANSACTIONAL, + __BTREE_TRIGGER_ATOMIC, + __BTREE_TRIGGER_GC, __BTREE_TRIGGER_INSERT, __BTREE_TRIGGER_OVERWRITE, - __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, }; @@ -107,6 +108,10 @@ enum btree_update_flags { * causing us to go emergency read-only) */ #define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL) +#define BTREE_TRIGGER_ATOMIC (1U << __BTREE_TRIGGER_ATOMIC) + +/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ +#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) /* @new is entering the btree */ #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) @@ -114,9 +119,6 @@ enum btree_update_flags { /* @old is leaving the btree */ #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) -/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ -#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) - /* signal from bucket invalidate path to alloc trigger */ #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 74bf8eb90a4c42cd24dc61024ecb448740e271a7..3fd1085b6c61ee72e7e814cf722306ebdba057c4 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -720,7 +720,7 @@ static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t) { struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t); struct bkey_i min_key, max_key; - unsigned j, cacheline = 1; + unsigned cacheline = 1; t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)), bset_ro_tree_capacity(b, t)); @@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i) set_btree_bset(b, t, i); } -void bch2_bset_init_next(struct bch_fs *c, struct btree *b, - struct btree_node_entry *bne) +void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne) { struct bset *i = &bne->keys; struct bset_tree *t; - BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c)); + BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b)); BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b))); BUG_ON(b->nsets >= MAX_BSETS); diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 632c2b8c54609b4be37f11e18868e4c41dcb736b..79c77baaa383868c99660a78a656c73d187f996f 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b, void bch2_btree_keys_init(struct btree *); void bch2_bset_init_first(struct btree *, struct bset *); -void bch2_bset_init_next(struct bch_fs *, struct btree *, - struct btree_node_entry *); +void bch2_bset_init_next(struct btree *, struct btree_node_entry *); void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); void bch2_bset_insert(struct btree *, struct btree_node_iter *, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 8e2488a4b58d00a45f78a7c64a6c1e83f4b0ff59..d7c81beac14afae7ee44f11f28eb424f1b54a063 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) clear_btree_node_just_written(b); - kvpfree(b->data, btree_bytes(c)); + kvpfree(b->data, btree_buf_bytes(b)); b->data = NULL; #ifdef __KERNEL__ kvfree(b->aux_data); @@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { BUG_ON(b->data || b->aux_data); - b->data = kvpmalloc(btree_bytes(c), gfp); + b->data = kvpmalloc(btree_buf_bytes(b), gfp); if (!b->data) return -BCH_ERR_ENOMEM_btree_node_mem_alloc; #ifdef __KERNEL__ @@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) b->aux_data = NULL; #endif if (!b->aux_data) { - kvpfree(b->data, btree_bytes(c)); + kvpfree(b->data, btree_buf_bytes(b)); b->data = NULL; return -BCH_ERR_ENOMEM_btree_node_mem_alloc; } @@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) bkey_btree_ptr_init(&b->key); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); - b->byte_order = ilog2(btree_bytes(c)); + b->byte_order = ilog2(c->opts.btree_node_size); return b; } @@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) if (c->verify_data) list_move(&c->verify_data->list, &bc->live); - kvpfree(c->verify_ondisk, btree_bytes(c)); + kvpfree(c->verify_ondisk, c->opts.btree_node_size); for (i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); @@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc " failed unpacked %zu\n", b->unpack_fn_len, b->nr.live_u64s * sizeof(u64), - btree_bytes(c) - sizeof(struct btree_node), + btree_buf_bytes(b) - sizeof(struct btree_node), b->nr.live_u64s * 100 / btree_max_u64s(c), b->sib_u64s[0], b->sib_u64s[1], diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 4e1af58820522fc8feec3caf9afc34d12f76c772..6d33885fdbde0d101b4c5785a1bf57bf072fe8de 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b) _iter = 0; _iter < (_tbl)->size; _iter++) \ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) -static inline size_t btree_bytes(struct bch_fs *c) +static inline size_t btree_buf_bytes(const struct btree *b) { - return c->opts.btree_node_size; + return 1UL << b->byte_order; } -static inline size_t btree_max_u64s(struct bch_fs *c) +static inline size_t btree_buf_max_u64s(const struct btree *b) { - return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); + return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64); } -static inline size_t btree_pages(struct bch_fs *c) +static inline size_t btree_max_u64s(const struct bch_fs *c) { - return btree_bytes(c) / PAGE_SIZE; + return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64); } -static inline unsigned btree_blocks(struct bch_fs *c) +static inline size_t btree_sectors(const struct bch_fs *c) +{ + return c->opts.btree_node_size >> SECTOR_SHIFT; +} + +static inline unsigned btree_blocks(const struct bch_fs *c) { return btree_sectors(c) >> c->block_bits; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 49b4ade758c3623ed35557a02a00afd31b0bec52..1102995643b137c3a8a9fe5f12f0cce95edfafeb 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -597,7 +597,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[ptr_data_type(k->k, &p.ptr)], + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), p.ptr.gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { @@ -615,7 +615,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[ptr_data_type(k->k, &p.ptr)], + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), p.ptr.gen, g->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { @@ -637,7 +637,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, - bch2_data_types[ptr_data_type(k->k, &p.ptr)], + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), p.ptr.gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) @@ -649,7 +649,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[ptr_data_type(k->k, &p.ptr)], + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), p.ptr.gen, g->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) @@ -664,8 +664,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[g->data_type], - bch2_data_types[data_type], + bch2_data_type_str(g->data_type), + bch2_data_type_str(data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (data_type == BCH_DATA_btree) { @@ -1238,11 +1238,11 @@ static int bch2_gc_done(struct bch_fs *c, for (i = 0; i < BCH_DATA_NR; i++) { copy_dev_field(dev_usage_buckets_wrong, - d[i].buckets, "%s buckets", bch2_data_types[i]); + d[i].buckets, "%s buckets", bch2_data_type_str(i)); copy_dev_field(dev_usage_sectors_wrong, - d[i].sectors, "%s sectors", bch2_data_types[i]); + d[i].sectors, "%s sectors", bch2_data_type_str(i)); copy_dev_field(dev_usage_fragmented_wrong, - d[i].fragmented, "%s fragmented", bch2_data_types[i]); + d[i].fragmented, "%s fragmented", bch2_data_type_str(i)); } } @@ -1253,19 +1253,19 @@ static int bch2_gc_done(struct bch_fs *c, bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); copy_fs_field(fs_usage_hidden_wrong, - hidden, "hidden"); + b.hidden, "hidden"); copy_fs_field(fs_usage_btree_wrong, - btree, "btree"); + b.btree, "btree"); if (!metadata_only) { copy_fs_field(fs_usage_data_wrong, - data, "data"); + b.data, "data"); copy_fs_field(fs_usage_cached_wrong, - cached, "cached"); + b.cached, "cached"); copy_fs_field(fs_usage_reserved_wrong, - reserved, "reserved"); + b.reserved, "reserved"); copy_fs_field(fs_usage_nr_inodes_wrong, - nr_inodes,"nr_inodes"); + b.nr_inodes,"nr_inodes"); for (i = 0; i < BCH_REPLICAS_MAX; i++) copy_fs_field(fs_usage_persistent_reserved_wrong, @@ -1417,8 +1417,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ": got %s, should be %s", iter->pos.inode, iter->pos.offset, gc.gen, - bch2_data_types[new.data_type], - bch2_data_types[gc.data_type])) + bch2_data_type_str(new.data_type), + bch2_data_type_str(gc.data_type))) new.data_type = gc.data_type; #define copy_bucket_field(_errtype, _f) \ @@ -1428,7 +1428,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ": got %u, should be %u", \ iter->pos.inode, iter->pos.offset, \ gc.gen, \ - bch2_data_types[gc.data_type], \ + bch2_data_type_str(gc.data_type), \ new._f, gc._f)) \ new._f = gc._f; \ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 33db48e2153fef61f0c733f97278018f419c2b05..aa9b6cbe3226909626411b886731a8bb8648a558 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, unsigned flags = memalloc_nofs_save(); void *p; - BUG_ON(size > btree_bytes(c)); + BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); @@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) ptrs = ptrs_end = ((void *) new_whiteouts + bytes); - for (k = unwritten_whiteouts_start(c, b); - k != unwritten_whiteouts_end(c, b); + for (k = unwritten_whiteouts_start(b); + k != unwritten_whiteouts_end(b); k = bkey_p_next(k)) *--ptrs = k; @@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) verify_no_dups(b, new_whiteouts, (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); - memcpy_u64s(unwritten_whiteouts_start(c, b), + memcpy_u64s(unwritten_whiteouts_start(b), new_whiteouts, b->whiteout_u64s); btree_bounce_free(c, bytes, used_mempool, new_whiteouts); @@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, } bytes = sorting_entire_node - ? btree_bytes(c) + ? btree_buf_bytes(b) : __vstruct_bytes(struct btree_node, u64s); out = btree_bounce_alloc(c, bytes, &used_mempool); @@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, if (sorting_entire_node) { u64s = le16_to_cpu(out->keys.u64s); - BUG_ON(bytes != btree_bytes(c)); + BUG_ON(bytes != btree_buf_bytes(b)); /* * Our temporary buffer is the same size as the btree node's @@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) bne = want_new_bset(c, b); if (bne) - bch2_bset_init_next(c, b, bne); + bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); @@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ptr_written, b->written); } else { for (bne = write_block(b); - bset_byte_offset(b, bne) < btree_bytes(c); + bset_byte_offset(b, bne) < btree_buf_bytes(b); bne = (void *) bne + block_bytes(c)) btree_err_on(bne->keys.seq == b->data->keys.seq && !bch2_journal_seq_is_blacklisted(c, @@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "found bset signature after last bset"); } - sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); + sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); sorted->keys.u64s = 0; set_btree_bset(b, b->set, &b->data->keys); @@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BUG_ON(b->nr.live_u64s != u64s); - btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); + btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); if (updated_range) bch2_btree_node_drop_keys_outside_node(b); @@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work) rb->have_ioref = bch2_dev_get_ioref(ca, READ); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = rb->pick.ptr.offset; - bio->bi_iter.bi_size = btree_bytes(c); + bio->bi_iter.bi_size = btree_buf_bytes(b); if (rb->have_ioref) { bio_set_dev(bio, ca->disk_sb.bdev); @@ -1512,7 +1512,7 @@ fsck_err: } if (best >= 0) { - memcpy(b->data, ra->buf[best], btree_bytes(c)); + memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); } else { ret = -1; @@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool for (i = 0; i < ra->nr; i++) { ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); ra->bio[i] = bio_alloc_bioset(NULL, - buf_pages(ra->buf[i], btree_bytes(c)), + buf_pages(ra->buf[i], btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); @@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool rb->pick = pick; rb->bio.bi_iter.bi_sector = pick.ptr.offset; rb->bio.bi_end_io = btree_node_read_all_replicas_endio; - bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c)); + bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b)); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], @@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, ca = bch_dev_bkey_exists(c, pick.ptr.dev); bio = bio_alloc_bioset(NULL, - buf_pages(b->data, btree_bytes(c)), + buf_pages(b->data, btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); @@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, INIT_WORK(&rb->work, btree_node_read_work); bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_end_io = btree_node_read_endio; - bch2_bio_map(bio, b->data, btree_bytes(c)); + bch2_bio_map(bio, b->data, btree_buf_bytes(b)); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], @@ -2074,8 +2074,8 @@ do_write: i->u64s = 0; sort_iter_add(&sort_iter.iter, - unwritten_whiteouts_start(c, b), - unwritten_whiteouts_end(c, b)); + unwritten_whiteouts_start(b), + unwritten_whiteouts_end(b)); SET_BSET_SEPARATE_WHITEOUTS(i, false); b->whiteout_u64s = 0; @@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) bne = want_new_bset(c, b); if (bne) - bch2_bset_init_next(c, b, bne); + bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index fa298289e01656b989db38dcf19301ae4d880bb7..5467a8635be113102c56bb6f02986209533c35ac 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1337,7 +1337,7 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in if (path->should_be_locked && !trans->restarted && - (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_))) + (!dup || !bch2_btree_path_relock_norestart(trans, dup))) return; if (dup) { diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index da2b74fa63fcece86d7d92d18dc340330180c657..24772538e4cc74ada59851bd7847dd5ece5ea122 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -819,6 +819,11 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) +/* + * This should not be used in a fastpath, without first trying _do in + * nonblocking mode - it will cause excessive transaction restarts and + * potentially livelocking: + */ #define drop_locks_do(_trans, _do) \ ({ \ bch2_trans_unlock(_trans); \ diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 2d1c95c42f240cc88b31c2728d7a970560e4865a..bed75c93c06904e06f70e3afa92cc507a68b81c9 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -631,8 +631,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans, } __flatten -bool bch2_btree_path_relock_norestart(struct btree_trans *trans, - struct btree_path *path, unsigned long trace_ip) +bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path) { struct get_locks_fail f; @@ -642,7 +641,7 @@ bool bch2_btree_path_relock_norestart(struct btree_trans *trans, int __bch2_btree_path_relock(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { - if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) { + if (!bch2_btree_path_relock_norestart(trans, path)) { trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path); return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); } @@ -759,12 +758,39 @@ int bch2_trans_relock(struct btree_trans *trans) if (unlikely(trans->restarted)) return -((int) trans->restarted); - trans_for_each_path(trans, path, i) + trans_for_each_path(trans, path, i) { + struct get_locks_fail f; + if (path->should_be_locked && - !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { - trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path); + !btree_path_get_locks(trans, path, false, &f)) { + if (trace_trans_restart_relock_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_bpos_to_text(&buf, path->pos); + prt_printf(&buf, " l=%u seq=%u node seq=", + f.l, path->l[f.l].lock_seq); + if (IS_ERR_OR_NULL(f.b)) { + prt_str(&buf, bch2_err_str(PTR_ERR(f.b))); + } else { + prt_printf(&buf, "%u", f.b->c.lock.seq); + + struct six_lock_count c = + bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l); + prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); + + c = six_lock_counts(&f.b->c.lock); + prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); + } + + trace_trans_restart_relock(trans, _RET_IP_, buf.buf); + printbuf_exit(&buf); + } + + count_event(trans->c, trans_restart_relock); return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); } + } + return 0; } @@ -778,7 +804,7 @@ int bch2_trans_relock_notrace(struct btree_trans *trans) trans_for_each_path(trans, path, i) if (path->should_be_locked && - !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { + !bch2_btree_path_relock_norestart(trans, path)) { return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); } return 0; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index cc5500a957a1b3084d005abe8b0893146e354bca..4bd72c855da1a4028106b70e10727ad07d578614 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -312,8 +312,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *, /* relock: */ -bool bch2_btree_path_relock_norestart(struct btree_trans *, - struct btree_path *, unsigned long); +bool bch2_btree_path_relock_norestart(struct btree_trans *, struct btree_path *); int __bch2_btree_path_relock(struct btree_trans *, struct btree_path *, unsigned long); @@ -353,12 +352,6 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans, /* upgrade */ - -struct get_locks_fail { - unsigned l; - struct btree *b; -}; - bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, struct btree_path *, unsigned, struct get_locks_fail *); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 90eb8065ff2da0224c8627987f58e9314412dcff..30d69a6d133eec77c76c7e64a5de0d896ad6b732 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); EBUG_ON(bpos_lt(insert->k.p, b->data->min_key)); EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); - EBUG_ON(insert->k.u64s > - bch_btree_keys_u64s_remaining(trans->c, b)); + EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b)); EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); k = bch2_btree_node_iter_peek_all(node_iter, b); @@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, k->type = KEY_TYPE_deleted; if (k->needs_whiteout) - push_whiteout(trans->c, b, insert->k.p); + push_whiteout(b, insert->k.p); k->needs_whiteout = false; if (k >= btree_bset_last(b)->start) { @@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans) static inline int btree_key_can_insert(struct btree_trans *trans, struct btree *b, unsigned u64s) { - struct bch_fs *c = trans->c; - - if (!bch2_btree_node_insert_fits(c, b, u64s)) + if (!bch2_btree_node_insert_fits(b, u64s)) return -BCH_ERR_btree_insert_btree_node_full; return 0; @@ -418,7 +415,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags return 0; new_u64s = roundup_pow_of_two(u64s); - new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT); + new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_k)) return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s); @@ -448,9 +445,6 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) - return 0; - if (old_ops->trigger == new_ops->trigger) { ret = bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(new), @@ -586,9 +580,6 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) { - struct bch_fs *c = trans->c; - int ret = 0; - trans_for_each_update(trans, i) { /* * XXX: synchronization of cached update triggers with gc @@ -596,14 +587,15 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) */ BUG_ON(i->cached || i->level); - if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) { - ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); + if (btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)) && + gc_visited(trans->c, gc_pos_btree_node(insert_l(trans, i)->b))) { + int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); if (ret) - break; + return ret; } } - return ret; + return 0; } static inline int @@ -680,6 +672,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) return -BCH_ERR_btree_insert_need_mark_replicas; + /* XXX: we only want to run this if deltas are nonzero */ + bch2_trans_account_disk_usage_change(trans); + h = trans->hooks; while (h) { ret = h->fn(trans, h); @@ -689,8 +684,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, } trans_for_each_update(trans, i) - if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { - ret = run_one_mem_trigger(trans, i, i->flags); + if (BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS & (1U << i->bkey_type)) { + ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_ATOMIC|i->flags); if (ret) goto fatal_err; } @@ -994,6 +989,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) !trans->journal_entries_u64s) goto out_reset; + memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); + ret = bch2_trans_commit_run_triggers(trans); if (ret) goto out_reset; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d530307046f4cf93bdb4c4063409a9fff5e705c4..4a5a64499eb76698743ae7f20b4e47eaca09b868 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -430,6 +430,9 @@ struct btree_trans { struct journal_res journal_res; u64 *journal_seq; struct disk_reservation *disk_res; + + struct bch_fs_usage_base fs_usage_delta; + unsigned journal_u64s; unsigned extra_disk_res; /* XXX kill */ struct replicas_delta_list *fs_usage_deltas; @@ -653,7 +656,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type); BIT_ULL(BKEY_TYPE_reflink)| \ BIT_ULL(BKEY_TYPE_btree)) -#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ +#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \ (BIT_ULL(BKEY_TYPE_alloc)| \ BIT_ULL(BKEY_TYPE_inodes)| \ BIT_ULL(BKEY_TYPE_stripes)| \ @@ -661,7 +664,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type); #define BTREE_NODE_TYPE_HAS_TRIGGERS \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ - BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) + BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS) static inline bool btree_node_type_needs_gc(enum btree_node_type type) { @@ -738,4 +741,9 @@ enum btree_node_sibling { btree_next_sib, }; +struct get_locks_fail { + unsigned l; + struct btree *b; +}; + #endif /* _BCACHEFS_BTREE_TYPES_H */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 44f9dfa28a09d89984150b19d3831077a18485f1..17a5938aa71a6b43b45c12383e4690df146ee2a3 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, { size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f); - return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); + return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b); } /* Btree node freeing/allocation: */ @@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, * Always check for space for two keys, even if we won't have to * split at prior level - it might have been a merge instead: */ - if (bch2_btree_node_insert_fits(c, path->l[update_level].b, + if (bch2_btree_node_insert_fits(path->l[update_level].b, BKEY_BTREE_PTR_U64s_MAX * 2)) break; @@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as, unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s + nr_keys[i].val_u64s; - if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c)) + if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b)) n[i]->data->format = b->format; btree_node_set_format(n[i], n[i]->data->format); @@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_btree_node_prep_for_write(trans, path, b); - if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { + if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) { bch2_btree_node_unlock_write(trans, path, b); goto split; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index adfc62083844cf3b93d16d25d8269564f5b022a3..c593c925d1e3b03cfae5b4e7fdf0f7bc4b99df5c 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -184,21 +184,19 @@ static inline void btree_node_reset_sib_u64s(struct btree *b) b->sib_u64s[1] = b->nr.live_u64s; } -static inline void *btree_data_end(struct bch_fs *c, struct btree *b) +static inline void *btree_data_end(struct btree *b) { - return (void *) b->data + btree_bytes(c); + return (void *) b->data + btree_buf_bytes(b); } -static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c, - struct btree *b) +static inline struct bkey_packed *unwritten_whiteouts_start(struct btree *b) { - return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s); + return (void *) ((u64 *) btree_data_end(b) - b->whiteout_u64s); } -static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c, - struct btree *b) +static inline struct bkey_packed *unwritten_whiteouts_end(struct btree *b) { - return btree_data_end(c, b); + return btree_data_end(b); } static inline void *write_block(struct btree *b) @@ -221,13 +219,11 @@ static inline bool bkey_written(struct btree *b, struct bkey_packed *k) return __btree_addr_written(b, k); } -static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, - struct btree *b, - void *end) +static inline ssize_t __bch2_btree_u64s_remaining(struct btree *b, void *end) { ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + b->whiteout_u64s; - ssize_t total = c->opts.btree_node_size >> 3; + ssize_t total = btree_buf_bytes(b) >> 3; /* Always leave one extra u64 for bch2_varint_decode: */ used++; @@ -235,10 +231,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, return total - used; } -static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, - struct btree *b) +static inline size_t bch2_btree_keys_u64s_remaining(struct btree *b) { - ssize_t remaining = __bch_btree_u64s_remaining(c, b, + ssize_t remaining = __bch2_btree_u64s_remaining(b, btree_bkey_last(b, bset_tree_last(b))); BUG_ON(remaining < 0); @@ -260,14 +255,13 @@ static inline unsigned btree_write_set_buffer(struct btree *b) return 8 << BTREE_WRITE_SET_U64s_BITS; } -static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, - struct btree *b) +static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b) { struct bset_tree *t = bset_tree_last(b); struct btree_node_entry *bne = max(write_block(b), (void *) btree_bkey_last(b, bset_tree_last(b))); ssize_t remaining_space = - __bch_btree_u64s_remaining(c, b, bne->keys.start); + __bch2_btree_u64s_remaining(b, bne->keys.start); if (unlikely(bset_written(b, bset(b, t)))) { if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) @@ -281,12 +275,11 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, return NULL; } -static inline void push_whiteout(struct bch_fs *c, struct btree *b, - struct bpos pos) +static inline void push_whiteout(struct btree *b, struct bpos pos) { struct bkey_packed k; - BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s); + BUG_ON(bch2_btree_keys_u64s_remaining(b) < BKEY_U64s); EBUG_ON(btree_node_just_written(b)); if (!bkey_pack_pos(&k, pos, b)) { @@ -299,20 +292,19 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b, k.needs_whiteout = true; b->whiteout_u64s += k.u64s; - bkey_p_copy(unwritten_whiteouts_start(c, b), &k); + bkey_p_copy(unwritten_whiteouts_start(b), &k); } /* * write lock must be held on @b (else the dirty bset that we were going to * insert into could be written out from under us) */ -static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, - struct btree *b, unsigned u64s) +static inline bool bch2_btree_node_insert_fits(struct btree *b, unsigned u64s) { if (unlikely(btree_node_need_rewrite(b))) return false; - return u64s <= bch_btree_keys_u64s_remaining(c, b); + return u64s <= bch2_btree_keys_u64s_remaining(b); } void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 5c1169c78dafec7bf238854a74b37120f1c835cd..ac7844861966368cdce41efd9e27c898fe8ad6e7 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -125,13 +125,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite struct btree_write_buffered_key *wb, bool *write_locked, size_t *fast) { - struct bch_fs *c = trans->c; struct btree_path *path; int ret; EBUG_ON(!wb->journal_seq); - EBUG_ON(!c->btree_write_buffer.flushing.pin.seq); - EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); + EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); + EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); ret = bch2_btree_iter_traverse(iter); if (ret) @@ -155,7 +154,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite *write_locked = true; } - if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) { + if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) { *write_locked = false; return wb_flush_one_slowpath(trans, iter, wb); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index d83ea0e53df3f36f8476cd096ca4cc6948145cc3..54f7826ac49874d46b08330678ea0b2565ecc491 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -25,7 +25,7 @@ #include -static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, +static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, enum bch_data_type data_type, s64 sectors) { @@ -54,20 +54,20 @@ void bch2_fs_usage_initialize(struct bch_fs *c) bch2_fs_usage_acc_to_base(c, i); for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) - usage->reserved += usage->persistent_reserved[i]; + usage->b.reserved += usage->persistent_reserved[i]; for (unsigned i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); - fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); + fs_usage_data_type_to_base(&usage->b, e->data_type, usage->replicas[i]); } for_each_member_device(c, ca) { struct bch_dev_usage dev = bch2_dev_usage_read(ca); - usage->hidden += (dev.d[BCH_DATA_sb].buckets + - dev.d[BCH_DATA_journal].buckets) * + usage->b.hidden += (dev.d[BCH_DATA_sb].buckets + + dev.d[BCH_DATA_journal].buckets) * ca->mi.bucket_size; } @@ -188,15 +188,15 @@ void bch2_fs_usage_to_text(struct printbuf *out, prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity); prt_printf(out, "hidden:\t\t\t\t%llu\n", - fs_usage->u.hidden); + fs_usage->u.b.hidden); prt_printf(out, "data:\t\t\t\t%llu\n", - fs_usage->u.data); + fs_usage->u.b.data); prt_printf(out, "cached:\t\t\t\t%llu\n", - fs_usage->u.cached); + fs_usage->u.b.cached); prt_printf(out, "reserved:\t\t\t%llu\n", - fs_usage->u.reserved); + fs_usage->u.b.reserved); prt_printf(out, "nr_inodes:\t\t\t%llu\n", - fs_usage->u.nr_inodes); + fs_usage->u.b.nr_inodes); prt_printf(out, "online reserved:\t\t%llu\n", fs_usage->online_reserved); @@ -225,10 +225,10 @@ static u64 reserve_factor(u64 r) u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) { - return min(fs_usage->u.hidden + - fs_usage->u.btree + - fs_usage->u.data + - reserve_factor(fs_usage->u.reserved + + return min(fs_usage->u.b.hidden + + fs_usage->u.b.btree + + fs_usage->u.b.data + + reserve_factor(fs_usage->u.b.reserved + fs_usage->online_reserved), c->capacity); } @@ -240,17 +240,17 @@ __bch2_fs_usage_read_short(struct bch_fs *c) u64 data, reserved; ret.capacity = c->capacity - - bch2_fs_usage_read_one(c, &c->usage_base->hidden); + bch2_fs_usage_read_one(c, &c->usage_base->b.hidden); - data = bch2_fs_usage_read_one(c, &c->usage_base->data) + - bch2_fs_usage_read_one(c, &c->usage_base->btree); - reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) + + data = bch2_fs_usage_read_one(c, &c->usage_base->b.data) + + bch2_fs_usage_read_one(c, &c->usage_base->b.btree); + reserved = bch2_fs_usage_read_one(c, &c->usage_base->b.reserved) + percpu_u64_get(c->online_reserved); ret.used = min(ret.capacity, data + reserve_factor(reserved)); ret.free = ret.capacity - ret.used; - ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes); + ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes); return ret; } @@ -284,7 +284,7 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) prt_newline(out); for (unsigned i = 0; i < BCH_DATA_NR; i++) { - prt_str(out, bch2_data_types[i]); + bch2_prt_data_type(out, i); prt_tab(out); prt_u64(out, usage->d[i].buckets); prt_tab_rjust(out); @@ -308,9 +308,9 @@ void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, fs_usage = fs_usage_ptr(c, journal_seq, gc); if (data_type_is_hidden(old->data_type)) - fs_usage->hidden -= ca->mi.bucket_size; + fs_usage->b.hidden -= ca->mi.bucket_size; if (data_type_is_hidden(new->data_type)) - fs_usage->hidden += ca->mi.bucket_size; + fs_usage->b.hidden += ca->mi.bucket_size; u = dev_usage_ptr(ca, journal_seq, gc); @@ -359,7 +359,7 @@ static inline int __update_replicas(struct bch_fs *c, if (idx < 0) return -1; - fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); + fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors); fs_usage->replicas[idx] += sectors; return 0; } @@ -394,7 +394,7 @@ int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); - fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); + fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors); fs_usage->replicas[idx] += sectors; preempt_enable(); err: @@ -523,8 +523,8 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, if (bch2_fs_inconsistent_on(g->data_type && g->data_type != data_type, c, "different types of data in same bucket: %s, %s", - bch2_data_types[g->data_type], - bch2_data_types[data_type])) { + bch2_data_type_str(g->data_type), + bch2_data_type_str(data_type))) { ret = -EIO; goto err; } @@ -532,7 +532,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size", ca->dev_idx, b, g->gen, - bch2_data_types[g->data_type ?: data_type], + bch2_data_type_str(g->data_type ?: data_type), g->dirty_sectors, sectors)) { ret = -EIO; goto err; @@ -575,7 +575,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - bch2_data_types[bucket_data_type ?: ptr_data_type], + bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EIO; @@ -588,7 +588,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - bch2_data_types[bucket_data_type ?: ptr_data_type], + bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); @@ -603,7 +603,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, "while marking %s", ptr->dev, bucket_nr, b_gen, *bucket_gen(ca, bucket_nr), - bch2_data_types[bucket_data_type ?: ptr_data_type], + bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); @@ -624,8 +624,8 @@ int bch2_check_bucket_ref(struct btree_trans *trans, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - bch2_data_types[bucket_data_type], - bch2_data_types[ptr_data_type], + bch2_data_type_str(bucket_data_type), + bch2_data_type_str(ptr_data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EIO; @@ -638,7 +638,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - bch2_data_types[bucket_data_type ?: ptr_data_type], + bch2_data_type_str(bucket_data_type ?: ptr_data_type), bucket_sectors, sectors, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); @@ -677,11 +677,11 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans, BUG_ON(__update_replicas(c, dst, &d->r, -d->delta)); } - dst->nr_inodes -= deltas->nr_inodes; + dst->b.nr_inodes -= deltas->nr_inodes; for (i = 0; i < BCH_REPLICAS_MAX; i++) { added -= deltas->persistent_reserved[i]; - dst->reserved -= deltas->persistent_reserved[i]; + dst->b.reserved -= deltas->persistent_reserved[i]; dst->persistent_reserved[i] -= deltas->persistent_reserved[i]; } @@ -694,48 +694,25 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans, percpu_up_read(&c->mark_lock); } -int bch2_trans_fs_usage_apply(struct btree_trans *trans, - struct replicas_delta_list *deltas) +void bch2_trans_account_disk_usage_change(struct btree_trans *trans) { struct bch_fs *c = trans->c; + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; static int warned_disk_usage = 0; bool warn = false; - u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; - struct replicas_delta *d, *d2; - struct replicas_delta *top = (void *) deltas->d + deltas->used; - struct bch_fs_usage *dst; - s64 added = 0, should_not_have_added; - unsigned i; percpu_down_read(&c->mark_lock); preempt_disable(); - dst = fs_usage_ptr(c, trans->journal_res.seq, false); - - for (d = deltas->d; d != top; d = replicas_delta_next(d)) { - switch (d->r.data_type) { - case BCH_DATA_btree: - case BCH_DATA_user: - case BCH_DATA_parity: - added += d->delta; - } + struct bch_fs_usage_base *dst = &fs_usage_ptr(c, trans->journal_res.seq, false)->b; + struct bch_fs_usage_base *src = &trans->fs_usage_delta; - if (__update_replicas(c, dst, &d->r, d->delta)) - goto need_mark; - } - - dst->nr_inodes += deltas->nr_inodes; - - for (i = 0; i < BCH_REPLICAS_MAX; i++) { - added += deltas->persistent_reserved[i]; - dst->reserved += deltas->persistent_reserved[i]; - dst->persistent_reserved[i] += deltas->persistent_reserved[i]; - } + s64 added = src->btree + src->data + src->reserved; /* * Not allowed to reduce sectors_available except by getting a * reservation: */ - should_not_have_added = added - (s64) disk_res_sectors; + s64 should_not_have_added = added - (s64) disk_res_sectors; if (unlikely(should_not_have_added > 0)) { u64 old, new, v = atomic64_read(&c->sectors_available); @@ -754,6 +731,13 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, this_cpu_sub(*c->online_reserved, added); } + dst->hidden += src->hidden; + dst->btree += src->btree; + dst->data += src->data; + dst->cached += src->cached; + dst->reserved += src->reserved; + dst->nr_inodes += src->nr_inodes; + preempt_enable(); percpu_up_read(&c->mark_lock); @@ -761,6 +745,34 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, bch2_trans_inconsistent(trans, "disk usage increased %lli more than %llu sectors reserved)", should_not_have_added, disk_res_sectors); +} + +int bch2_trans_fs_usage_apply(struct btree_trans *trans, + struct replicas_delta_list *deltas) +{ + struct bch_fs *c = trans->c; + struct replicas_delta *d, *d2; + struct replicas_delta *top = (void *) deltas->d + deltas->used; + struct bch_fs_usage *dst; + unsigned i; + + percpu_down_read(&c->mark_lock); + preempt_disable(); + dst = fs_usage_ptr(c, trans->journal_res.seq, false); + + for (d = deltas->d; d != top; d = replicas_delta_next(d)) + if (__update_replicas(c, dst, &d->r, d->delta)) + goto need_mark; + + dst->b.nr_inodes += deltas->nr_inodes; + + for (i = 0; i < BCH_REPLICAS_MAX; i++) { + dst->b.reserved += deltas->persistent_reserved[i]; + dst->persistent_reserved[i] += deltas->persistent_reserved[i]; + } + + preempt_enable(); + percpu_up_read(&c->mark_lock); return 0; need_mark: /* revert changes: */ @@ -1084,7 +1096,7 @@ static int __trigger_reservation(struct btree_trans *trans, struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc); replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved)); - fs_usage->reserved += sectors; + fs_usage->b.reserved += sectors; fs_usage->persistent_reserved[replicas - 1] += sectors; preempt_enable(); @@ -1130,9 +1142,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], - bch2_data_types[type], - bch2_data_types[type]); + bch2_data_type_str(a->v.data_type), + bch2_data_type_str(type), + bch2_data_type_str(type)); ret = -EIO; goto err; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2c95cc5d86be661c6d6a0783d366d5d8b8b919d7..6387e039f7897534e27c207dd3818dc4b6afb3b7 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -356,6 +356,8 @@ int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, ret; \ }) +void bch2_trans_account_disk_usage_change(struct btree_trans *); + void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); @@ -385,6 +387,21 @@ static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) return false; } +static inline const char *bch2_data_type_str(enum bch_data_type type) +{ + return type < BCH_DATA_NR + ? __bch2_data_types[type] + : "(invalid data type)"; +} + +static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type) +{ + if (type < BCH_DATA_NR) + prt_str(out, __bch2_data_types[type]); + else + prt_printf(out, "(invalid data type %u)", type); +} + /* disk reservations: */ static inline void bch2_disk_reservation_put(struct bch_fs *c, diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 783f71017204cafa0277644a6d1b5564c779d366..6a31740222a7132e3f0735675ba63ed3402f00a8 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -45,23 +45,18 @@ struct bch_dev_usage { } d[BCH_DATA_NR]; }; -struct bch_fs_usage { - /* all fields are in units of 512 byte sectors: */ +struct bch_fs_usage_base { u64 hidden; u64 btree; u64 data; u64 cached; u64 reserved; u64 nr_inodes; +}; - /* XXX: add stats for compression ratio */ -#if 0 - u64 uncompressed; - u64 compressed; -#endif - - /* broken out: */ - +struct bch_fs_usage { + /* all fields are in units of 512 byte sectors: */ + struct bch_fs_usage_base b; u64 persistent_reserved[BCH_REPLICAS_MAX]; u64 replicas[]; }; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index f41889093a2c7eacaa1723667fc7bb2af5d0f3aa..3636444511064b51e5a004b953eacf94e7c70d12 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -109,7 +109,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, if (cpu_timeout != MAX_SCHEDULE_TIMEOUT) mod_timer(&wait.cpu_timer, cpu_timeout + jiffies); - while (1) { + do { set_current_state(TASK_INTERRUPTIBLE); if (kthread && kthread_should_stop()) break; @@ -119,7 +119,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, schedule(); try_to_freeze(); - } + } while (0); __set_current_state(TASK_RUNNING); del_timer_sync(&wait.cpu_timer); diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 607fd5e232c902dbb39f3dac84ea2e214e6b106c..58c2eb45570ff022764720f9beb10ecfa2926367 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -47,6 +47,14 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } +static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type) +{ + if (type < BCH_COMPRESSION_TYPE_NR) + prt_str(out, __bch2_compression_types[type]); + else + prt_printf(out, "(invalid compression type %u)", type); +} + int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, struct bch_extent_crc_unpacked *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 6f13477ff652e9e0552b9fbbb49009a5651d6d76..4150feca42a2e65e63a59234a3e806ebbd09e1ac 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -285,9 +285,7 @@ restart_drop_extra_replicas: k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, insert->k.p) ?: - bch2_bkey_set_needs_rebalance(c, insert, - op->opts.background_target, - op->opts.background_compression) ?: + bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?: bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, @@ -529,7 +527,7 @@ int bch2_data_update_init(struct btree_trans *trans, BCH_WRITE_DATA_ENCODED| BCH_WRITE_MOVE| m->data_opts.write_flags; - m->op.compression_opt = io_opts.background_compression ?: io_opts.compression; + m->op.compression_opt = background_compression(io_opts); m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; bkey_for_each_ptr(ptrs, ptr) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index d6418948495f8392898178dd9b350b1829a24aae..cadda9bbe4a4cd67fe3b6f6f7aa5a5d93e496307 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -44,19 +44,19 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, return false; bio = bio_alloc_bioset(ca->disk_sb.bdev, - buf_pages(n_sorted, btree_bytes(c)), + buf_pages(n_sorted, btree_buf_bytes(b)), REQ_OP_READ|REQ_META, GFP_NOFS, &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; - bch2_bio_map(bio, n_sorted, btree_bytes(c)); + bch2_bio_map(bio, n_sorted, btree_buf_bytes(b)); submit_bio_wait(bio); bio_put(bio); percpu_ref_put(&ca->io_ref); - memcpy(n_ondisk, n_sorted, btree_bytes(c)); + memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); v->written = 0; if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) @@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) mutex_lock(&c->verify_lock); if (!c->verify_ondisk) { - c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); + c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL); if (!c->verify_ondisk) goto out; } @@ -199,19 +199,19 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, return; } - n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); + n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL); if (!n_ondisk) { prt_printf(out, "memory allocation failure\n"); goto out; } bio = bio_alloc_bioset(ca->disk_sb.bdev, - buf_pages(n_ondisk, btree_bytes(c)), + buf_pages(n_ondisk, btree_buf_bytes(b)), REQ_OP_READ|REQ_META, GFP_NOFS, &c->btree_bio); bio->bi_iter.bi_sector = pick.ptr.offset; - bch2_bio_map(bio, n_ondisk, btree_bytes(c)); + bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b)); ret = submit_bio_wait(bio); if (ret) { @@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, out: if (bio) bio_put(bio); - kvpfree(n_ondisk, btree_bytes(c)); + kvpfree(n_ondisk, btree_buf_bytes(b)); percpu_ref_put(&ca->io_ref); } diff --git a/fs/bcachefs/dirent_format.h b/fs/bcachefs/dirent_format.h new file mode 100644 index 0000000000000000000000000000000000000000..5e116b88e81463ceeee7b1df6a11121669e7b496 --- /dev/null +++ b/fs/bcachefs/dirent_format.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_DIRENT_FORMAT_H +#define _BCACHEFS_DIRENT_FORMAT_H + +/* + * Dirents (and xattrs) have to implement string lookups; since our b-tree + * doesn't support arbitrary length strings for the key, we instead index by a + * 64 bit hash (currently truncated sha1) of the string, stored in the offset + * field of the key - using linear probing to resolve hash collisions. This also + * provides us with the readdir cookie posix requires. + * + * Linear probing requires us to use whiteouts for deletions, in the event of a + * collision: + */ + +struct bch_dirent { + struct bch_val v; + + /* Target inode number: */ + union { + __le64 d_inum; + struct { /* DT_SUBVOL */ + __le32 d_child_subvol; + __le32 d_parent_subvol; + }; + }; + + /* + * Copy of mode bits 12-15 from the target inode - so userspace can get + * the filetype without having to do a stat() + */ + __u8 d_type; + + __u8 d_name[]; +} __packed __aligned(8); + +#define DT_SUBVOL 16 +#define BCH_DT_MAX 17 + +#define BCH_NAME_MAX 512 + +#endif /* _BCACHEFS_DIRENT_FORMAT_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index d802bc63c8d0b4832bd8062ce827c8af180361e6..d503af2700247d8aa1257962c37df9b042ee55ec 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -190,7 +190,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, a->v.stripe_redundancy, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], + bch2_data_type_str(a->v.data_type), a->v.dirty_sectors, a->v.stripe, s.k->p.offset)) { ret = -EIO; @@ -200,7 +200,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], + bch2_data_type_str(a->v.data_type), a->v.dirty_sectors, s.k->p.offset)) { ret = -EIO; @@ -367,7 +367,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, } } - if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) { + if (flags & BTREE_TRIGGER_ATOMIC) { struct stripe *m = genradix_ptr(&c->stripes, idx); if (!m) { diff --git a/fs/bcachefs/ec_format.h b/fs/bcachefs/ec_format.h new file mode 100644 index 0000000000000000000000000000000000000000..44ce88ba08d712c3b388792a607676d27d85c2ae --- /dev/null +++ b/fs/bcachefs/ec_format.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_EC_FORMAT_H +#define _BCACHEFS_EC_FORMAT_H + +struct bch_stripe { + struct bch_val v; + __le16 sectors; + __u8 algorithm; + __u8 nr_blocks; + __u8 nr_redundant; + + __u8 csum_granularity_bits; + __u8 csum_type; + __u8 pad; + + struct bch_extent_ptr ptrs[]; +} __packed __aligned(8); + +#endif /* _BCACHEFS_EC_FORMAT_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 82ec056f4cdbb1f4e4234fce274939b61b7a5015..61395b113df9bdad67c0da7d2a4cc4f99664bc4e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "btree_cache.h" #include "btree_gc.h" #include "btree_io.h" #include "btree_iter.h" @@ -1018,12 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ", crc.compressed_size, crc.uncompressed_size, crc.offset, crc.nonce, - bch2_csum_types[crc.csum_type], - bch2_compression_types[crc.compression_type]); + bch2_csum_types[crc.csum_type]); + bch2_prt_compression_type(out, crc.compression_type); break; } case BCH_EXTENT_ENTRY_stripe_ptr: { @@ -1334,10 +1335,12 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) } int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - unsigned target, unsigned compression) + struct bch_io_opts *opts) { struct bkey_s k = bkey_i_to_s(_k); struct bch_extent_rebalance *r; + unsigned target = opts->background_target; + unsigned compression = background_compression(*opts); bool needs_rebalance; if (!bkey_extent_is_direct_data(k.k)) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index a855c94d43ddb4f770f69807401f6d9dd5f66cbf..6bf839d69e84e6e24ed3bf2bf611177fc04676e1 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -708,7 +708,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, - unsigned, unsigned); + struct bch_io_opts *); /* Generic extent code: */ diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h new file mode 100644 index 0000000000000000000000000000000000000000..3bd2fdbb08174c395d2f80b94fef388e784c2ab0 --- /dev/null +++ b/fs/bcachefs/extents_format.h @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_EXTENTS_FORMAT_H +#define _BCACHEFS_EXTENTS_FORMAT_H + +/* + * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally + * preceded by checksum/compression information (bch_extent_crc32 or + * bch_extent_crc64). + * + * One major determining factor in the format of extents is how we handle and + * represent extents that have been partially overwritten and thus trimmed: + * + * If an extent is not checksummed or compressed, when the extent is trimmed we + * don't have to remember the extent we originally allocated and wrote: we can + * merely adjust ptr->offset to point to the start of the data that is currently + * live. The size field in struct bkey records the current (live) size of the + * extent, and is also used to mean "size of region on disk that we point to" in + * this case. + * + * Thus an extent that is not checksummed or compressed will consist only of a + * list of bch_extent_ptrs, with none of the fields in + * bch_extent_crc32/bch_extent_crc64. + * + * When an extent is checksummed or compressed, it's not possible to read only + * the data that is currently live: we have to read the entire extent that was + * originally written, and then return only the part of the extent that is + * currently live. + * + * Thus, in addition to the current size of the extent in struct bkey, we need + * to store the size of the originally allocated space - this is the + * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, + * when the extent is trimmed, instead of modifying the offset field of the + * pointer, we keep a second smaller offset field - "offset into the original + * extent of the currently live region". + * + * The other major determining factor is replication and data migration: + * + * Each pointer may have its own bch_extent_crc32/64. When doing a replicated + * write, we will initially write all the replicas in the same format, with the + * same checksum type and compression format - however, when copygc runs later (or + * tiering/cache promotion, anything that moves data), it is not in general + * going to rewrite all the pointers at once - one of the replicas may be in a + * bucket on one device that has very little fragmentation while another lives + * in a bucket that has become heavily fragmented, and thus is being rewritten + * sooner than the rest. + * + * Thus it will only move a subset of the pointers (or in the case of + * tiering/cache promotion perhaps add a single pointer without dropping any + * current pointers), and if the extent has been partially overwritten it must + * write only the currently live portion (or copygc would not be able to reduce + * fragmentation!) - which necessitates a different bch_extent_crc format for + * the new pointer. + * + * But in the interests of space efficiency, we don't want to store one + * bch_extent_crc for each pointer if we don't have to. + * + * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and + * bch_extent_ptrs appended arbitrarily one after the other. We determine the + * type of a given entry with a scheme similar to utf8 (except we're encoding a + * type, not a size), encoding the type in the position of the first set bit: + * + * bch_extent_crc32 - 0b1 + * bch_extent_ptr - 0b10 + * bch_extent_crc64 - 0b100 + * + * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and + * bch_extent_crc64 is the least constrained). + * + * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, + * until the next bch_extent_crc32/64. + * + * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer + * is neither checksummed nor compressed. + */ + +#define BCH_EXTENT_ENTRY_TYPES() \ + x(ptr, 0) \ + x(crc32, 1) \ + x(crc64, 2) \ + x(crc128, 3) \ + x(stripe_ptr, 4) \ + x(rebalance, 5) +#define BCH_EXTENT_ENTRY_MAX 6 + +enum bch_extent_entry_type { +#define x(f, n) BCH_EXTENT_ENTRY_##f = n, + BCH_EXTENT_ENTRY_TYPES() +#undef x +}; + +/* Compressed/uncompressed size are stored biased by 1: */ +struct bch_extent_crc32 { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 type:2, + _compressed_size:7, + _uncompressed_size:7, + offset:7, + _unused:1, + csum_type:4, + compression_type:4; + __u32 csum; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u32 csum; + __u32 compression_type:4, + csum_type:4, + _unused:1, + offset:7, + _uncompressed_size:7, + _compressed_size:7, + type:2; +#endif +} __packed __aligned(8); + +#define CRC32_SIZE_MAX (1U << 7) +#define CRC32_NONCE_MAX 0 + +struct bch_extent_crc64 { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:3, + _compressed_size:9, + _uncompressed_size:9, + offset:9, + nonce:10, + csum_type:4, + compression_type:4, + csum_hi:16; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 csum_hi:16, + compression_type:4, + csum_type:4, + nonce:10, + offset:9, + _uncompressed_size:9, + _compressed_size:9, + type:3; +#endif + __u64 csum_lo; +} __packed __aligned(8); + +#define CRC64_SIZE_MAX (1U << 9) +#define CRC64_NONCE_MAX ((1U << 10) - 1) + +struct bch_extent_crc128 { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:4, + _compressed_size:13, + _uncompressed_size:13, + offset:13, + nonce:13, + csum_type:4, + compression_type:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 compression_type:4, + csum_type:4, + nonce:13, + offset:13, + _uncompressed_size:13, + _compressed_size:13, + type:4; +#endif + struct bch_csum csum; +} __packed __aligned(8); + +#define CRC128_SIZE_MAX (1U << 13) +#define CRC128_NONCE_MAX ((1U << 13) - 1) + +/* + * @reservation - pointer hasn't been written to, just reserved + */ +struct bch_extent_ptr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:1, + cached:1, + unused:1, + unwritten:1, + offset:44, /* 8 petabytes */ + dev:8, + gen:8; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 gen:8, + dev:8, + offset:44, + unwritten:1, + unused:1, + cached:1, + type:1; +#endif +} __packed __aligned(8); + +struct bch_extent_stripe_ptr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:5, + block:8, + redundancy:4, + idx:47; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 idx:47, + redundancy:4, + block:8, + type:5; +#endif +}; + +struct bch_extent_rebalance { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:6, + unused:34, + compression:8, /* enum bch_compression_opt */ + target:16; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 target:16, + compression:8, + unused:34, + type:6; +#endif +}; + +union bch_extent_entry { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 + unsigned long type; +#elif __BITS_PER_LONG == 32 + struct { + unsigned long pad; + unsigned long type; + }; +#else +#error edit for your odd byteorder. +#endif + +#define x(f, n) struct bch_extent_##f f; + BCH_EXTENT_ENTRY_TYPES() +#undef x +}; + +struct bch_btree_ptr { + struct bch_val v; + + __u64 _data[0]; + struct bch_extent_ptr start[]; +} __packed __aligned(8); + +struct bch_btree_ptr_v2 { + struct bch_val v; + + __u64 mem_ptr; + __le64 seq; + __le16 sectors_written; + __le16 flags; + struct bpos min_key; + __u64 _data[0]; + struct bch_extent_ptr start[]; +} __packed __aligned(8); + +LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); + +struct bch_extent { + struct bch_val v; + + __u64 _data[0]; + union bch_extent_entry start[]; +} __packed __aligned(8); + +/* Maximum size (in u64s) a single pointer could be: */ +#define BKEY_EXTENT_PTR_U64s_MAX\ + ((sizeof(struct bch_extent_crc128) + \ + sizeof(struct bch_extent_ptr)) / sizeof(__u64)) + +/* Maximum possible size of an entire extent value: */ +#define BKEY_EXTENT_VAL_U64s_MAX \ + (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) + +/* * Maximum possible size of an entire extent, key + value: */ +#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) + +/* Btree pointers don't carry around checksums: */ +#define BKEY_BTREE_PTR_VAL_U64s_MAX \ + ((sizeof(struct bch_btree_ptr_v2) + \ + sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) +#define BKEY_BTREE_PTR_U64s_MAX \ + (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) + +struct bch_reservation { + struct bch_val v; + + __le32 generation; + __u8 nr_replicas; + __u8 pad[3]; +} __packed __aligned(8); + +struct bch_inline_data { + struct bch_val v; + u8 data[]; +}; + +#endif /* _BCACHEFS_EXTENTS_FORMAT_H */ diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index 9637f636e32d508571a5908c536b48b8e3ed792c..b04750dbf870bc78c95ece35d363e3a4c0936b50 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -156,7 +156,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) } #define eytzinger1_for_each(_i, _size) \ - for ((_i) = eytzinger1_first((_size)); \ + for (unsigned (_i) = eytzinger1_first((_size)); \ (_i) != 0; \ (_i) = eytzinger1_next((_i), (_size))) @@ -227,7 +227,7 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) } #define eytzinger0_for_each(_i, _size) \ - for ((_i) = eytzinger0_first((_size)); \ + for (unsigned (_i) = eytzinger0_first((_size)); \ (_i) != -1; \ (_i) = eytzinger0_next((_i), (_size))) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index fdd57c5785c9cebf609959fb753ee30e55e85b92..e3b219e19e1008ccfe1ff61e966115795f9c1831 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -77,6 +77,10 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) bch2_inode_opts_get(&opts, c, &inode->ei_inode); + /* bios must be 512 byte aligned: */ + if ((offset|iter->count) & (SECTOR_SIZE - 1)) + return -EINVAL; + ret = min_t(loff_t, iter->count, max_t(loff_t, 0, i_size_read(&inode->v) - offset)); diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index ff664fd0d8ef80e8b4816d7c430e87d41759b498..d359aa9b33b828342bd466b899713f401d939b30 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -309,39 +309,49 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, } } -void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, - u64 start, u64 end) +int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, + u64 *start, u64 end, + bool nonblocking) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - pgoff_t index = start >> PAGE_SECTORS_SHIFT; + pgoff_t index = *start >> PAGE_SECTORS_SHIFT; pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; struct folio_batch fbatch; s64 i_sectors_delta = 0; - unsigned i, j; + int ret = 0; - if (end <= start) - return; + if (end <= *start) + return 0; folio_batch_init(&fbatch); while (filemap_get_folios(inode->v.i_mapping, &index, end_index, &fbatch)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { + for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; + + if (!nonblocking) + folio_lock(folio); + else if (!folio_trylock(folio)) { + folio_batch_release(&fbatch); + ret = -EAGAIN; + break; + } + u64 folio_start = folio_sector(folio); u64 folio_end = folio_end_sector(folio); - unsigned folio_offset = max(start, folio_start) - folio_start; - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - struct bch_folio *s; BUG_ON(end <= folio_start); - folio_lock(folio); - s = bch2_folio(folio); + *start = min(end, folio_end); + struct bch_folio *s = bch2_folio(folio); if (s) { + unsigned folio_offset = max(*start, folio_start) - folio_start; + unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; + spin_lock(&s->lock); - for (j = folio_offset; j < folio_offset + folio_len; j++) { + for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { i_sectors_delta -= s->s[j].state == SECTOR_dirty; bch2_folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); @@ -356,6 +366,7 @@ void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, } bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + return ret; } static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h index 27f712ae37a68209275cc3b2955a542314e80e68..8cbaba6565b4493695d679fe41553c197468c752 100644 --- a/fs/bcachefs/fs-io-pagecache.h +++ b/fs/bcachefs/fs-io-pagecache.h @@ -143,7 +143,7 @@ int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned); void bch2_bio_page_state_set(struct bio *, struct bkey_s_c); void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64); -void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64); +int bch2_mark_pagecache_reserved(struct bch_inode_info *, u64 *, u64, bool); int bch2_get_folio_disk_reservation(struct bch_fs *, struct bch_inode_info *, diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 98bd5babab193bec842dce20b0783e6c958ac5bf..dc52918d06ef3f91c30484822a5a170b08543f9c 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -675,8 +675,11 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); - drop_locks_do(trans, - (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); + if (bch2_mark_pagecache_reserved(inode, &hole_start, + iter.pos.offset, true)) + drop_locks_do(trans, + bch2_mark_pagecache_reserved(inode, &hole_start, + iter.pos.offset, false)); bkey_err: bch2_quota_reservation_put(c, inode, "a_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 1cbc5807bc807ba700875152d08f766f466ec40a..3a4c24c28e7fa06deff38f6bb0b240a5daacda8c 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -337,11 +337,12 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) create_flags |= BCH_CREATE_SNAPSHOT_RO; - /* why do we need this lock? */ - down_read(&c->vfs_sb->s_umount); - - if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) + if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { + /* sync_inodes_sb enforce s_umount is locked */ + down_read(&c->vfs_sb->s_umount); sync_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); + } retry: if (arg.src_ptr) { error = user_path_at(arg.dirfd, @@ -425,8 +426,6 @@ err2: goto retry; } err1: - up_read(&c->vfs_sb->s_umount); - return error; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 37dce96f48ac42d28b98d99e75a77b049e04de8f..086f0090b03a4015388dce49388ba5951940cb0a 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -506,22 +506,33 @@ fsck_err: static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { - prt_printf(out, "mode=%o ", inode->bi_mode); + printbuf_indent_add(out, 2); + prt_printf(out, "mode=%o", inode->bi_mode); + prt_newline(out); prt_str(out, "flags="); prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); prt_printf(out, " (%x)", inode->bi_flags); + prt_newline(out); - prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu", - inode->bi_journal_seq, - inode->bi_size, - inode->bi_sectors, - inode->bi_version); + prt_printf(out, "journal_seq=%llu", inode->bi_journal_seq); + prt_newline(out); + + prt_printf(out, "bi_size=%llu", inode->bi_size); + prt_newline(out); + + prt_printf(out, "bi_sectors=%llu", inode->bi_sectors); + prt_newline(out); + + prt_newline(out); + prt_printf(out, "bi_version=%llu", inode->bi_version); #define x(_name, _bits) \ - prt_printf(out, " "#_name "=%llu", (u64) inode->_name); + prt_printf(out, #_name "=%llu", (u64) inode->_name); \ + prt_newline(out); BCH_INODE_FIELDS_v3() #undef x + printbuf_indent_sub(out, 2); } void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) @@ -587,7 +598,7 @@ int bch2_trigger_inode(struct btree_trans *trans, } } - if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { BUG_ON(!trans->journal_res.seq); bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); @@ -597,7 +608,7 @@ int bch2_trigger_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; percpu_down_read(&c->mark_lock); - this_cpu_add(c->usage_gc->nr_inodes, nr); + this_cpu_add(c->usage_gc->b.nr_inodes, nr); percpu_up_read(&c->mark_lock); } diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h new file mode 100644 index 0000000000000000000000000000000000000000..83d107331edf473ab5df16d9222c4a7e16c56c4a --- /dev/null +++ b/fs/bcachefs/inode_format.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_INODE_FORMAT_H +#define _BCACHEFS_INODE_FORMAT_H + +#define BLOCKDEV_INODE_MAX 4096 +#define BCACHEFS_ROOT_INO 4096 + +struct bch_inode { + struct bch_val v; + + __le64 bi_hash_seed; + __le32 bi_flags; + __le16 bi_mode; + __u8 fields[]; +} __packed __aligned(8); + +struct bch_inode_v2 { + struct bch_val v; + + __le64 bi_journal_seq; + __le64 bi_hash_seed; + __le64 bi_flags; + __le16 bi_mode; + __u8 fields[]; +} __packed __aligned(8); + +struct bch_inode_v3 { + struct bch_val v; + + __le64 bi_journal_seq; + __le64 bi_hash_seed; + __le64 bi_flags; + __le64 bi_sectors; + __le64 bi_size; + __le64 bi_version; + __u8 fields[]; +} __packed __aligned(8); + +#define INODEv3_FIELDS_START_INITIAL 6 +#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) + +struct bch_inode_generation { + struct bch_val v; + + __le32 bi_generation; + __le32 pad; +} __packed __aligned(8); + +/* + * bi_subvol and bi_parent_subvol are only set for subvolume roots: + */ + +#define BCH_INODE_FIELDS_v2() \ + x(bi_atime, 96) \ + x(bi_ctime, 96) \ + x(bi_mtime, 96) \ + x(bi_otime, 96) \ + x(bi_size, 64) \ + x(bi_sectors, 64) \ + x(bi_uid, 32) \ + x(bi_gid, 32) \ + x(bi_nlink, 32) \ + x(bi_generation, 32) \ + x(bi_dev, 32) \ + x(bi_data_checksum, 8) \ + x(bi_compression, 8) \ + x(bi_project, 32) \ + x(bi_background_compression, 8) \ + x(bi_data_replicas, 8) \ + x(bi_promote_target, 16) \ + x(bi_foreground_target, 16) \ + x(bi_background_target, 16) \ + x(bi_erasure_code, 16) \ + x(bi_fields_set, 16) \ + x(bi_dir, 64) \ + x(bi_dir_offset, 64) \ + x(bi_subvol, 32) \ + x(bi_parent_subvol, 32) + +#define BCH_INODE_FIELDS_v3() \ + x(bi_atime, 96) \ + x(bi_ctime, 96) \ + x(bi_mtime, 96) \ + x(bi_otime, 96) \ + x(bi_uid, 32) \ + x(bi_gid, 32) \ + x(bi_nlink, 32) \ + x(bi_generation, 32) \ + x(bi_dev, 32) \ + x(bi_data_checksum, 8) \ + x(bi_compression, 8) \ + x(bi_project, 32) \ + x(bi_background_compression, 8) \ + x(bi_data_replicas, 8) \ + x(bi_promote_target, 16) \ + x(bi_foreground_target, 16) \ + x(bi_background_target, 16) \ + x(bi_erasure_code, 16) \ + x(bi_fields_set, 16) \ + x(bi_dir, 64) \ + x(bi_dir_offset, 64) \ + x(bi_subvol, 32) \ + x(bi_parent_subvol, 32) \ + x(bi_nocow, 8) + +/* subset of BCH_INODE_FIELDS */ +#define BCH_INODE_OPTS() \ + x(data_checksum, 8) \ + x(compression, 8) \ + x(project, 32) \ + x(background_compression, 8) \ + x(data_replicas, 8) \ + x(promote_target, 16) \ + x(foreground_target, 16) \ + x(background_target, 16) \ + x(erasure_code, 16) \ + x(nocow, 8) + +enum inode_opt_id { +#define x(name, ...) \ + Inode_opt_##name, + BCH_INODE_OPTS() +#undef x + Inode_opt_nr, +}; + +#define BCH_INODE_FLAGS() \ + x(sync, 0) \ + x(immutable, 1) \ + x(append, 2) \ + x(nodump, 3) \ + x(noatime, 4) \ + x(i_size_dirty, 5) \ + x(i_sectors_dirty, 6) \ + x(unlinked, 7) \ + x(backptr_untrusted, 8) + +/* bits 20+ reserved for packed fields below: */ + +enum bch_inode_flags { +#define x(t, n) BCH_INODE_##t = 1U << n, + BCH_INODE_FLAGS() +#undef x +}; + +enum __bch_inode_flags { +#define x(t, n) __BCH_INODE_##t = n, + BCH_INODE_FLAGS() +#undef x +}; + +LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); +LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); +LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); + +LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); +LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); + +LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); +LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); + +LE64_BITMASK(INODEv3_FIELDS_START, + struct bch_inode_v3, bi_flags, 31, 36); +LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); + +#endif /* _BCACHEFS_INODE_FORMAT_H */ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index ca6d5f516aa2be80824e7479e73d1cbfc2607117..1baf78594ccaf85d7d89fea4fc938a7f700d6dc0 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -442,9 +442,7 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_bkey_set_needs_rebalance(c, copy, - opts.background_target, - opts.background_compression) ?: + ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 33c0e783d54697b50c490309726b49eacb410189..ef3a53f9045af2591ab1f9e272dd9d6151250444 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -362,9 +362,7 @@ static int bch2_write_index_default(struct bch_write_op *op) bkey_start_pos(&sk.k->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - ret = bch2_bkey_set_needs_rebalance(c, sk.k, - op->opts.background_target, - op->opts.background_compression) ?: + ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?: bch2_extent_update(trans, inum, &iter, sk.k, &op->res, op->new_i_size, &op->i_sectors_delta, @@ -1447,10 +1445,11 @@ err: op->flags |= BCH_WRITE_DONE; if (ret < 0) { - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s(): error: %s", __func__, bch2_err_str(ret)); + if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + op->pos.offset << 9, + "%s(): error: %s", __func__, bch2_err_str(ret)); op->error = ret; break; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8538ef34f62bc54e8bc570acbe793e4771745247..d71d26e39521e4410a90cb6bf3e21df360e6c201 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -27,6 +27,47 @@ static const char * const bch2_journal_errors[] = { NULL }; +static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq) +{ + union journal_res_state s = READ_ONCE(j->reservations); + unsigned i = seq & JOURNAL_BUF_MASK; + struct journal_buf *buf = j->buf + i; + + prt_printf(out, "seq:"); + prt_tab(out); + prt_printf(out, "%llu", seq); + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_printf(out, "refcount:"); + prt_tab(out); + prt_printf(out, "%u", journal_state_count(s, i)); + prt_newline(out); + + prt_printf(out, "size:"); + prt_tab(out); + prt_human_readable_u64(out, vstruct_bytes(buf->data)); + prt_newline(out); + + prt_printf(out, "expires"); + prt_tab(out); + prt_printf(out, "%li jiffies", buf->expires - jiffies); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} + +static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) +{ + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 24); + + for (u64 seq = journal_last_unwritten_seq(j); + seq <= journal_cur_seq(j); + seq++) + bch2_journal_buf_to_text(out, j, seq); +} + static inline bool journal_seq_unwritten(struct journal *j, u64 seq) { return seq > j->seq_ondisk; @@ -156,7 +197,7 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write) * We don't close a journal_buf until the next journal_buf is finished writing, * and can be opened again - this also initializes the next journal_buf: */ -static void __journal_entry_close(struct journal *j, unsigned closed_val) +static void __journal_entry_close(struct journal *j, unsigned closed_val, bool trace) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf = journal_cur_buf(j); @@ -185,7 +226,17 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val) /* Close out old buffer: */ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - trace_journal_entry_close(c, vstruct_bytes(buf->data)); + if (trace_journal_entry_close_enabled() && trace) { + struct printbuf pbuf = PRINTBUF; + pbuf.atomic++; + + prt_str(&pbuf, "entry size: "); + prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); + prt_newline(&pbuf); + bch2_prt_task_backtrace(&pbuf, current, 1); + trace_journal_entry_close(c, pbuf.buf); + printbuf_exit(&pbuf); + } sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; @@ -225,7 +276,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val) void bch2_journal_halt(struct journal *j) { spin_lock(&j->lock); - __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); if (!j->err_seq) j->err_seq = journal_cur_seq(j); journal_wake(j); @@ -239,7 +290,7 @@ static bool journal_entry_want_write(struct journal *j) /* Don't close it yet if we already have a write in flight: */ if (ret) - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); else if (nr_unwritten_journal_entries(j)) { struct journal_buf *buf = journal_cur_buf(j); @@ -406,7 +457,7 @@ static void journal_write_work(struct work_struct *work) if (delta > 0) mod_delayed_work(c->io_complete_wq, &j->write_work, delta); else - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); unlock: spin_unlock(&j->lock); } @@ -463,13 +514,21 @@ retry: buf->buf_size < JOURNAL_ENTRY_SIZE_MAX) j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false); ret = journal_entry_open(j); if (ret == JOURNAL_ERR_max_in_flight) { track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], &j->max_in_flight_start, true); - trace_and_count(c, journal_entry_full, c); + if (trace_journal_entry_full_enabled()) { + struct printbuf buf = PRINTBUF; + buf.atomic++; + + bch2_journal_bufs_to_text(&buf, j); + trace_journal_entry_full(c, buf.buf); + printbuf_exit(&buf); + } + count_event(c, journal_entry_full); } unlock: can_discard = j->can_discard; @@ -549,7 +608,7 @@ void bch2_journal_entry_res_resize(struct journal *j, /* * Not enough room in current journal entry, have to flush it: */ - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); } else { journal_cur_buf(j)->u64s_reserved += d; } @@ -606,7 +665,7 @@ recheck_need_open: struct journal_res res = { 0 }; if (journal_entry_is_open(j)) - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); spin_unlock(&j->lock); @@ -786,7 +845,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou if (buf->need_flush_to_write_buffer) { if (seq == journal_cur_seq(j)) - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); union journal_res_state s; s.v = atomic64_read_acquire(&j->reservations.counter); @@ -1339,35 +1398,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } prt_newline(out); - - for (u64 seq = journal_cur_seq(j); - seq >= journal_last_unwritten_seq(j); - --seq) { - unsigned i = seq & JOURNAL_BUF_MASK; - - prt_printf(out, "unwritten entry:"); - prt_tab(out); - prt_printf(out, "%llu", seq); - prt_newline(out); - printbuf_indent_add(out, 2); - - prt_printf(out, "refcount:"); - prt_tab(out); - prt_printf(out, "%u", journal_state_count(s, i)); - prt_newline(out); - - prt_printf(out, "sectors:"); - prt_tab(out); - prt_printf(out, "%u", j->buf[i].sectors); - prt_newline(out); - - prt_printf(out, "expires"); - prt_tab(out); - prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies); - prt_newline(out); - - printbuf_indent_sub(out, 2); - } + prt_printf(out, "unwritten entries:"); + prt_newline(out); + bch2_journal_bufs_to_text(out, j); prt_printf(out, "replay done:\t\t%i\n", diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index b0f4dd491e1205d28c6af528fb59696cdbc4dc9c..04a1e79a5ed392cd8ebaac922a2516b374a6d094 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -683,10 +683,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); for (i = 0; i < nr_types; i++) { - if (i < BCH_DATA_NR) - prt_printf(out, " %s", bch2_data_types[i]); - else - prt_printf(out, " (unknown data type %u)", i); + bch2_prt_data_type(out, i); prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu", le64_to_cpu(u->d[i].buckets), le64_to_cpu(u->d[i].sectors), diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h new file mode 100644 index 0000000000000000000000000000000000000000..6a4bf7129dba236c7d3b456f901e38fd9cda57c0 --- /dev/null +++ b/fs/bcachefs/logged_ops_format.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H +#define _BCACHEFS_LOGGED_OPS_FORMAT_H + +struct bch_logged_op_truncate { + struct bch_val v; + __le32 subvol; + __le32 pad; + __le64 inum; + __le64 new_i_size; +}; + +enum logged_op_finsert_state { + LOGGED_OP_FINSERT_start, + LOGGED_OP_FINSERT_shift_extents, + LOGGED_OP_FINSERT_finish, +}; + +struct bch_logged_op_finsert { + struct bch_val v; + __u8 state; + __u8 pad[3]; + __le32 subvol; + __le64 inum; + __le64 dst_offset; + __le64 src_offset; + __le64 pos; +}; + +#endif /* _BCACHEFS_LOGGED_OPS_FORMAT_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7a33319dcd168001594f6532bafe0caf92f83c22..bf68ea49447b95055a4f6a1e6e7c6a7e373aebc5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -6,9 +6,11 @@ #include "backpointers.h" #include "bkey_buf.h" #include "btree_gc.h" +#include "btree_io.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" +#include "compress.h" #include "disk_groups.h" #include "ec.h" #include "errcode.h" @@ -34,12 +36,46 @@ const char * const bch2_data_ops_strs[] = { NULL }; -static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) +static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + printbuf_tabstop_push(out, 20); + prt_str(out, "rewrite ptrs:"); + prt_tab(out); + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); + prt_newline(out); + + prt_str(out, "kill ptrs: "); + prt_tab(out); + bch2_prt_u64_base2(out, data_opts->kill_ptrs); + prt_newline(out); + + prt_str(out, "target: "); + prt_tab(out); + bch2_target_to_text(out, c, data_opts->target); + prt_newline(out); + + prt_str(out, "compression: "); + prt_tab(out); + bch2_compression_opt_to_text(out, background_compression(*io_opts)); + prt_newline(out); + + prt_str(out, "extra replicas: "); + prt_tab(out); + prt_u64(out, data_opts->extra_replicas); +} + +static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { if (trace_move_extent_enabled()) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_move_extent(c, buf.buf); printbuf_exit(&buf); } @@ -111,6 +147,15 @@ static void move_write(struct moving_io *io) return; } + if (trace_move_extent_write_enabled()) { + struct bch_fs *c = io->write.op.c; + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); + trace_move_extent_write(c, buf.buf); + printbuf_exit(&buf); + } + closure_get(&io->write.ctxt->cl); atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); atomic_inc(&io->write.ctxt->write_ios); @@ -241,9 +286,10 @@ int bch2_move_extent(struct moving_context *ctxt, unsigned sectors = k.k->size, pages; int ret = -ENOMEM; + trace_move_extent2(c, k, &io_opts, &data_opts); + if (ctxt->stats) ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); - trace_move_extent2(c, k); bch2_data_update_opts_normalize(k, &data_opts); @@ -759,6 +805,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, if (!b) goto next; + unsigned sectors = btree_ptr_sectors_written(&b->key); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); bch2_trans_iter_exit(trans, &iter); @@ -768,11 +816,10 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, goto err; if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, - c->opts.btree_node_size >> 9); + bch2_ratelimit_increment(ctxt->rate, sectors); if (ctxt->stats) { - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); + atomic64_add(sectors, &ctxt->stats->sectors_seen); + atomic64_add(sectors, &ctxt->stats->sectors_moved); } } next: @@ -1083,9 +1130,9 @@ int bch2_data_job(struct bch_fs *c, void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) { - prt_printf(out, "%s: data type=%s pos=", - stats->name, - bch2_data_types[stats->data_type]); + prt_printf(out, "%s: data type==", stats->name); + bch2_prt_data_type(out, stats->data_type); + prt_str(out, " pos="); bch2_bbpos_to_text(out, stats->pos); prt_newline(out); printbuf_indent_add(out, 2); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 8e6f230eac38155bf5d048367d6ebde35a4a15bd..b1ed0b9a20d35d61491ce0cff28b4bb2c7be42c3 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -52,7 +52,7 @@ const char * const bch2_csum_opts[] = { NULL }; -const char * const bch2_compression_types[] = { +const char * const __bch2_compression_types[] = { BCH_COMPRESSION_TYPES() NULL }; @@ -72,7 +72,7 @@ const char * const bch2_str_hash_opts[] = { NULL }; -const char * const bch2_data_types[] = { +const char * const __bch2_data_types[] = { BCH_DATA_TYPES() NULL }; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 93a24fef42148488cdddb391cd291dd0e0168063..9a4b7faa376503993f1c2da8f8d1e5963ef6ca5a 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -18,11 +18,11 @@ extern const char * const bch2_sb_compat[]; extern const char * const __bch2_btree_ids[]; extern const char * const bch2_csum_types[]; extern const char * const bch2_csum_opts[]; -extern const char * const bch2_compression_types[]; +extern const char * const __bch2_compression_types[]; extern const char * const bch2_compression_opts[]; extern const char * const bch2_str_hash_types[]; extern const char * const bch2_str_hash_opts[]; -extern const char * const bch2_data_types[]; +extern const char * const __bch2_data_types[]; extern const char * const bch2_member_states[]; extern const char * const bch2_jset_entry_types[]; extern const char * const bch2_fs_usage_types[]; @@ -564,6 +564,11 @@ struct bch_io_opts { #undef x }; +static inline unsigned background_compression(struct bch_io_opts opts) +{ + return opts.background_compression ?: opts.compression; +} + struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); bool bch2_opt_is_inode_opt(enum bch_opt_id); diff --git a/fs/bcachefs/quota_format.h b/fs/bcachefs/quota_format.h new file mode 100644 index 0000000000000000000000000000000000000000..dc34347ef6c74a9933bb069915b8902cfdb190a2 --- /dev/null +++ b/fs/bcachefs/quota_format.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_QUOTA_FORMAT_H +#define _BCACHEFS_QUOTA_FORMAT_H + +/* KEY_TYPE_quota: */ + +enum quota_types { + QTYP_USR = 0, + QTYP_GRP = 1, + QTYP_PRJ = 2, + QTYP_NR = 3, +}; + +enum quota_counters { + Q_SPC = 0, + Q_INO = 1, + Q_COUNTERS = 2, +}; + +struct bch_quota_counter { + __le64 hardlimit; + __le64 softlimit; +}; + +struct bch_quota { + struct bch_val v; + struct bch_quota_counter c[Q_COUNTERS]; +} __packed __aligned(8); + +/* BCH_SB_FIELD_quota: */ + +struct bch_sb_quota_counter { + __le32 timelimit; + __le32 warnlimit; +}; + +struct bch_sb_quota_type { + __le64 flags; + struct bch_sb_quota_counter c[Q_COUNTERS]; +}; + +struct bch_sb_field_quota { + struct bch_sb_field field; + struct bch_sb_quota_type q[QTYP_NR]; +} __packed __aligned(8); + +#endif /* _BCACHEFS_QUOTA_FORMAT_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 95f46cb3b5bdfd820e845a8cceda2b3c2fb67cf4..22d1017aa49b975756905a9a69ce8bcd82416ca3 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -177,8 +177,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, prt_str(&buf, "target="); bch2_target_to_text(&buf, c, r->target); prt_str(&buf, " compression="); - struct bch_compression_opt opt = __bch2_compression_decode(r->compression); - prt_str(&buf, bch2_compression_opts[opt.type]); + bch2_compression_opt_to_text(&buf, r->compression); prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, k); @@ -254,13 +253,12 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, if (k.k->p.inode) { target = io_opts->background_target; - compression = io_opts->background_compression ?: io_opts->compression; + compression = background_compression(*io_opts); } else { const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); target = r ? r->target : io_opts->background_target; - compression = r ? r->compression : - (io_opts->background_compression ?: io_opts->compression); + compression = r ? r->compression : background_compression(*io_opts); } data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); @@ -371,6 +369,7 @@ static int do_rebalance(struct moving_context *ctxt) !kthread_should_stop() && !atomic64_read(&r->work_stats.sectors_seen) && !atomic64_read(&r->scan_stats.sectors_seen)) { + bch2_moving_ctxt_flush_all(ctxt); bch2_trans_unlock_long(trans); rebalance_wait(c); } @@ -385,7 +384,6 @@ static int bch2_rebalance_thread(void *arg) struct bch_fs *c = arg; struct bch_fs_rebalance *r = &c->rebalance; struct moving_context ctxt; - int ret; set_freezable(); @@ -393,8 +391,7 @@ static int bch2_rebalance_thread(void *arg) writepoint_ptr(&c->rebalance_write_point), true); - while (!kthread_should_stop() && - !(ret = do_rebalance(&ctxt))) + while (!kthread_should_stop() && !do_rebalance(&ctxt)) ; bch2_moving_ctxt_exit(&ctxt); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 725214605a050996196c28a9132f8fe247e76d28..9127d0e3ca2f6a3fd44e076b42f01ee6f7736427 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -280,7 +280,7 @@ static int journal_replay_entry_early(struct bch_fs *c, le64_to_cpu(u->v); break; case BCH_FS_USAGE_inodes: - c->usage_base->nr_inodes = le64_to_cpu(u->v); + c->usage_base->b.nr_inodes = le64_to_cpu(u->v); break; case BCH_FS_USAGE_key_version: atomic64_set(&c->key_version, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index faa5d367005874f8838128822c9584f9bdf48b33..c47c66c2b394dc8df391fa3adf8bfea03e1e447e 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -292,10 +292,10 @@ static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *f } } -int bch2_trans_mark_reflink_v(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) +int bch2_trigger_reflink_v(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) @@ -324,7 +324,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out, min(datalen, 32U), d.v->data); } -int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, +int bch2_trigger_indirect_inline_data(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_s new, unsigned flags) @@ -486,6 +486,13 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); + if (dst_inum.inum < src_inum.inum) { + /* Avoid some lock cycle transaction restarts */ + ret = bch2_btree_iter_traverse(&dst_iter); + if (ret) + continue; + } + dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); bch2_btree_iter_set_pos(&src_iter, src_want); @@ -538,9 +545,7 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, - opts.background_target, - opts.background_compression) ?: + ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?: bch2_extent_update(trans, dst_inum, &dst_iter, new_dst.k, &disk_res, new_i_size, i_sectors_delta, diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 8ee778ec0022a327145eb91ebefbcb38cc1240bf..4d8867289717bf6cf46f05b0c58e3adcc42efae7 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -24,14 +24,14 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, +int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ .key_invalid = bch2_reflink_v_invalid, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ - .trigger = bch2_trans_mark_reflink_v, \ + .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) @@ -39,7 +39,7 @@ int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_trans_mark_indirect_inline_data(struct btree_trans *, +int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); @@ -47,7 +47,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *, #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ .key_invalid = bch2_indirect_inline_data_invalid, \ .val_to_text = bch2_indirect_inline_data_to_text, \ - .trigger = bch2_trans_mark_indirect_inline_data, \ + .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h new file mode 100644 index 0000000000000000000000000000000000000000..6772eebb1fc6685faf4729ebbd1b726e1b4d922e --- /dev/null +++ b/fs/bcachefs/reflink_format.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_REFLINK_FORMAT_H +#define _BCACHEFS_REFLINK_FORMAT_H + +struct bch_reflink_p { + struct bch_val v; + __le64 idx; + /* + * A reflink pointer might point to an indirect extent which is then + * later split (by copygc or rebalance). If we only pointed to part of + * the original indirect extent, and then one of the fragments is + * outside the range we point to, we'd leak a refcount: so when creating + * reflink pointers, we need to store pad values to remember the full + * range we were taking a reference on. + */ + __le32 front_pad; + __le32 back_pad; +} __packed __aligned(8); + +struct bch_reflink_v { + struct bch_val v; + __le64 refcount; + union bch_extent_entry start[0]; + __u64 _data[]; +} __packed __aligned(8); + +struct bch_indirect_inline_data { + struct bch_val v; + __le64 refcount; + u8 data[]; +}; + +#endif /* _BCACHEFS_REFLINK_FORMAT_H */ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 92ba56ef1fc89690656e9625871ecd7ee38b5f9b..cc2672c120312c39f82e9a1a9afe0ed959b15dba 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -9,6 +9,12 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, struct bch_replicas_cpu *); +/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ +static int bch2_memcmp(const void *l, const void *r, size_t size) +{ + return memcmp(l, r, size); +} + /* Replicas tracking - in memory: */ static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) @@ -33,21 +39,16 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) { - eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); + eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); } static void bch2_replicas_entry_v0_to_text(struct printbuf *out, struct bch_replicas_entry_v0 *e) { - unsigned i; - - if (e->data_type < BCH_DATA_NR) - prt_printf(out, "%s", bch2_data_types[e->data_type]); - else - prt_printf(out, "(invalid data type %u)", e->data_type); + bch2_prt_data_type(out, e->data_type); prt_printf(out, ": %u [", e->nr_devs); - for (i = 0; i < e->nr_devs; i++) + for (unsigned i = 0; i < e->nr_devs; i++) prt_printf(out, i ? " %u" : "%u", e->devs[i]); prt_printf(out, "]"); } @@ -55,15 +56,10 @@ static void bch2_replicas_entry_v0_to_text(struct printbuf *out, void bch2_replicas_entry_to_text(struct printbuf *out, struct bch_replicas_entry_v1 *e) { - unsigned i; - - if (e->data_type < BCH_DATA_NR) - prt_printf(out, "%s", bch2_data_types[e->data_type]); - else - prt_printf(out, "(invalid data type %u)", e->data_type); + bch2_prt_data_type(out, e->data_type); prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); - for (i = 0; i < e->nr_devs; i++) + for (unsigned i = 0; i < e->nr_devs; i++) prt_printf(out, i ? " %u" : "%u", e->devs[i]); prt_printf(out, "]"); } @@ -831,7 +827,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, sort_cmp_size(cpu_r->entries, cpu_r->nr, cpu_r->entry_size, - memcmp, NULL); + bch2_memcmp, NULL); for (i = 0; i < cpu_r->nr; i++) { struct bch_replicas_entry_v1 *e = diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 9632f36f5f318134065cfdbae613b422cce98f6a..b6bf0ebe7e84046a5d08ade7d34bae9ae0bff3a5 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -207,7 +207,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.type = BCH_JSET_ENTRY_usage; u->entry.btree_id = BCH_FS_USAGE_inodes; - u->v = cpu_to_le64(c->usage_base->nr_inodes); + u->v = cpu_to_le64(c->usage_base->b.nr_inodes); } { diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/sb-counters.c similarity index 99% rename from fs/bcachefs/counters.c rename to fs/bcachefs/sb-counters.c index 02a996e06a64e3d10483f7fcbffc0de66428f9ed..7dc898761bb3125a79c82a5de17de0807920d98d 100644 --- a/fs/bcachefs/counters.c +++ b/fs/bcachefs/sb-counters.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "super-io.h" -#include "counters.h" +#include "sb-counters.h" /* BCH_SB_FIELD_counters */ diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/sb-counters.h similarity index 77% rename from fs/bcachefs/counters.h rename to fs/bcachefs/sb-counters.h index 4778aa19bf346459c5ca252e6b75279503867f43..81f8aec9fcb1cedf43143f269fdfc1b6fb39e441 100644 --- a/fs/bcachefs/counters.h +++ b/fs/bcachefs/sb-counters.h @@ -1,11 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_COUNTERS_H -#define _BCACHEFS_COUNTERS_H +#ifndef _BCACHEFS_SB_COUNTERS_H +#define _BCACHEFS_SB_COUNTERS_H #include "bcachefs.h" #include "super-io.h" - int bch2_sb_counters_to_cpu(struct bch_fs *); int bch2_sb_counters_from_cpu(struct bch_fs *); @@ -14,4 +13,4 @@ int bch2_fs_counters_init(struct bch_fs *); extern const struct bch_sb_field_ops bch_sb_field_ops_counters; -#endif // _BCACHEFS_COUNTERS_H +#endif // _BCACHEFS_SB_COUNTERS_H diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h new file mode 100644 index 0000000000000000000000000000000000000000..62ea478215d08e77a5a7708a2a7dfc39f9236355 --- /dev/null +++ b/fs/bcachefs/sb-counters_format.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H +#define _BCACHEFS_SB_COUNTERS_FORMAT_H + +#define BCH_PERSISTENT_COUNTERS() \ + x(io_read, 0) \ + x(io_write, 1) \ + x(io_move, 2) \ + x(bucket_invalidate, 3) \ + x(bucket_discard, 4) \ + x(bucket_alloc, 5) \ + x(bucket_alloc_fail, 6) \ + x(btree_cache_scan, 7) \ + x(btree_cache_reap, 8) \ + x(btree_cache_cannibalize, 9) \ + x(btree_cache_cannibalize_lock, 10) \ + x(btree_cache_cannibalize_lock_fail, 11) \ + x(btree_cache_cannibalize_unlock, 12) \ + x(btree_node_write, 13) \ + x(btree_node_read, 14) \ + x(btree_node_compact, 15) \ + x(btree_node_merge, 16) \ + x(btree_node_split, 17) \ + x(btree_node_rewrite, 18) \ + x(btree_node_alloc, 19) \ + x(btree_node_free, 20) \ + x(btree_node_set_root, 21) \ + x(btree_path_relock_fail, 22) \ + x(btree_path_upgrade_fail, 23) \ + x(btree_reserve_get_fail, 24) \ + x(journal_entry_full, 25) \ + x(journal_full, 26) \ + x(journal_reclaim_finish, 27) \ + x(journal_reclaim_start, 28) \ + x(journal_write, 29) \ + x(read_promote, 30) \ + x(read_bounce, 31) \ + x(read_split, 33) \ + x(read_retry, 32) \ + x(read_reuse_race, 34) \ + x(move_extent_read, 35) \ + x(move_extent_write, 36) \ + x(move_extent_finish, 37) \ + x(move_extent_fail, 38) \ + x(move_extent_start_fail, 39) \ + x(copygc, 40) \ + x(copygc_wait, 41) \ + x(gc_gens_end, 42) \ + x(gc_gens_start, 43) \ + x(trans_blocked_journal_reclaim, 44) \ + x(trans_restart_btree_node_reused, 45) \ + x(trans_restart_btree_node_split, 46) \ + x(trans_restart_fault_inject, 47) \ + x(trans_restart_iter_upgrade, 48) \ + x(trans_restart_journal_preres_get, 49) \ + x(trans_restart_journal_reclaim, 50) \ + x(trans_restart_journal_res_get, 51) \ + x(trans_restart_key_cache_key_realloced, 52) \ + x(trans_restart_key_cache_raced, 53) \ + x(trans_restart_mark_replicas, 54) \ + x(trans_restart_mem_realloced, 55) \ + x(trans_restart_memory_allocation_failure, 56) \ + x(trans_restart_relock, 57) \ + x(trans_restart_relock_after_fill, 58) \ + x(trans_restart_relock_key_cache_fill, 59) \ + x(trans_restart_relock_next_node, 60) \ + x(trans_restart_relock_parent_for_fill, 61) \ + x(trans_restart_relock_path, 62) \ + x(trans_restart_relock_path_intent, 63) \ + x(trans_restart_too_many_iters, 64) \ + x(trans_restart_traverse, 65) \ + x(trans_restart_upgrade, 66) \ + x(trans_restart_would_deadlock, 67) \ + x(trans_restart_would_deadlock_write, 68) \ + x(trans_restart_injected, 69) \ + x(trans_restart_key_cache_upgrade, 70) \ + x(trans_traverse_all, 71) \ + x(transaction_commit, 72) \ + x(write_super, 73) \ + x(trans_restart_would_deadlock_recursion_limit, 74) \ + x(trans_restart_write_buffer_flush, 75) \ + x(trans_restart_split_race, 76) \ + x(write_buffer_flush_slowpath, 77) \ + x(write_buffer_flush_sync, 78) + +enum bch_persistent_counters { +#define x(t, n, ...) BCH_COUNTER_##t, + BCH_PERSISTENT_COUNTERS() +#undef x + BCH_COUNTER_NR +}; + +struct bch_sb_field_counters { + struct bch_sb_field field; + __le64 d[]; +}; + +#endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index a44a238bf8b5550023226844734424b1211c812a..a45354d2acde9f3ad0b149247c8ff4c7c869fb15 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -251,7 +251,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Data allowed:"); prt_tab(out); if (BCH_MEMBER_DATA_ALLOWED(&m)) - prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); + prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); else prt_printf(out, "(none)"); prt_newline(out); @@ -259,7 +259,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Has data:"); prt_tab(out); if (data_have) - prt_bitflags(out, bch2_data_types, data_have); + prt_bitflags(out, __bch2_data_types, data_have); else prt_printf(out, "(none)"); prt_newline(out); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 56af937523ff2a8deda0a5168f45a67533a57da5..45f67e8b29eb67f188e5cfb32aa39e0b1ad1d625 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1053,6 +1053,8 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, n->v.subvol = cpu_to_le32(snapshot_subvols[i]); n->v.tree = cpu_to_le32(tree); n->v.depth = cpu_to_le32(depth); + n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); + n->v.btime.hi = 0; for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); @@ -1681,5 +1683,5 @@ int bch2_snapshots_read(struct bch_fs *c) void bch2_fs_snapshots_exit(struct bch_fs *c) { - kfree(rcu_dereference_protected(c->snapshots, true)); + kvfree(rcu_dereference_protected(c->snapshots, true)); } diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h new file mode 100644 index 0000000000000000000000000000000000000000..aabcd3a74cd95d6712f7f9f257cc8ee2caac4958 --- /dev/null +++ b/fs/bcachefs/snapshot_format.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SNAPSHOT_FORMAT_H +#define _BCACHEFS_SNAPSHOT_FORMAT_H + +struct bch_snapshot { + struct bch_val v; + __le32 flags; + __le32 parent; + __le32 children[2]; + __le32 subvol; + /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */ + __le32 tree; + __le32 depth; + __le32 skip[3]; + bch_le128 btime; +}; + +LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) + +/* True if a subvolume points to this snapshot node: */ +LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) + +/* + * Snapshot trees: + * + * The snapshot_trees btree gives us persistent indentifier for each tree of + * bch_snapshot nodes, and allow us to record and easily find the root/master + * subvolume that other snapshots were created from: + */ +struct bch_snapshot_tree { + struct bch_val v; + __le32 master_subvol; + __le32 root_snapshot; +}; + +#endif /* _BCACHEFS_SNAPSHOT_FORMAT_H */ diff --git a/fs/bcachefs/subvolume_format.h b/fs/bcachefs/subvolume_format.h new file mode 100644 index 0000000000000000000000000000000000000000..af79134b07d6ad304e7af22b838d9709e777a41b --- /dev/null +++ b/fs/bcachefs/subvolume_format.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUBVOLUME_FORMAT_H +#define _BCACHEFS_SUBVOLUME_FORMAT_H + +#define SUBVOL_POS_MIN POS(0, 1) +#define SUBVOL_POS_MAX POS(0, S32_MAX) +#define BCACHEFS_ROOT_SUBVOL 1 + +struct bch_subvolume { + struct bch_val v; + __le32 flags; + __le32 snapshot; + __le64 inode; + /* + * Snapshot subvolumes form a tree, separate from the snapshot nodes + * tree - if this subvolume is a snapshot, this is the ID of the + * subvolume it was created from: + * + * This is _not_ necessarily the subvolume of the directory containing + * this subvolume: + */ + __le32 parent; + __le32 pad; + bch_le128 otime; +}; + +LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) +/* + * We need to know whether a subvolume is a snapshot so we can know whether we + * can delete it (or whether it should just be rm -rf'd) + */ +LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) +LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) + +#endif /* _BCACHEFS_SUBVOLUME_FORMAT_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 6d3db5cce5f6ac9e315500c14fbb5e1d97ea8098..d60c7d27a0477cb0de116675671d5c888d8f1c86 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -2,7 +2,6 @@ #include "bcachefs.h" #include "checksum.h" -#include "counters.h" #include "disk_groups.h" #include "ec.h" #include "error.h" @@ -13,6 +12,7 @@ #include "replicas.h" #include "quota.h" #include "sb-clean.h" +#include "sb-counters.h" #include "sb-downgrade.h" #include "sb-errors.h" #include "sb-members.h" @@ -1321,7 +1321,9 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "Superblock size:"); prt_tab(out); - prt_printf(out, "%zu", vstruct_bytes(sb)); + prt_units_u64(out, vstruct_bytes(sb)); + prt_str(out, "/"); + prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits); prt_newline(out); prt_printf(out, "Clean:"); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 9dbc35940197f1c55c1bc48746bc23a3983ac203..b9911402b1753baa986a1673339c4454eba87431 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -23,7 +23,6 @@ #include "checksum.h" #include "clock.h" #include "compress.h" -#include "counters.h" #include "debug.h" #include "disk_groups.h" #include "ec.h" @@ -49,6 +48,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-counters.h" #include "sb-errors.h" #include "sb-members.h" #include "snapshot.h" @@ -883,7 +883,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, - btree_bytes(c)) || + c->opts.btree_node_size) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, sizeof(u64), GFP_KERNEL))) { @@ -1386,8 +1386,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) prt_bdevname(&name, ca->disk_sb.bdev); if (c->sb.nr_devices == 1) - strlcpy(c->name, name.buf, sizeof(c->name)); - strlcpy(ca->name, name.buf, sizeof(ca->name)); + strscpy(c->name, name.buf, sizeof(c->name)); + strscpy(ca->name, name.buf, sizeof(ca->name)); printbuf_exit(&name); @@ -1625,7 +1625,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) if (data) { struct printbuf data_has = PRINTBUF; - prt_bitflags(&data_has, bch2_data_types, data); + prt_bitflags(&data_has, __bch2_data_types, data); bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); printbuf_exit(&data_has); ret = -EBUSY; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 8ed52319ff68d2b93194970b7da51218a579b0dd..cee80c47feea2b27fa7d18fc55a39228db7f0b96 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -21,6 +21,7 @@ #include "btree_gc.h" #include "buckets.h" #include "clock.h" +#include "compress.h" #include "disk_groups.h" #include "ec.h" #include "inode.h" @@ -247,7 +248,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) mutex_lock(&c->btree_cache.lock); list_for_each_entry(b, &c->btree_cache.live, list) - ret += btree_bytes(c); + ret += btree_buf_bytes(b); mutex_unlock(&c->btree_cache.lock); return ret; @@ -330,7 +331,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c prt_newline(out); for (unsigned i = 0; i < ARRAY_SIZE(s); i++) { - prt_str(out, bch2_compression_types[i]); + bch2_prt_compression_type(out, i); prt_tab(out); prt_human_readable_u64(out, s[i].sectors_compressed << 9); @@ -725,8 +726,10 @@ STORE(bch2_fs_opts_dir) bch2_opt_set_sb(c, opt, v); bch2_opt_set_by_id(&c->opts, id, v); - if ((id == Opt_background_target || - id == Opt_background_compression) && v) + if (v && + (id == Opt_background_target || + id == Opt_background_compression || + (id == Opt_compression && !c->opts.background_compression))) bch2_set_rebalance_needs_scan(c, 0); ret = size; @@ -883,7 +886,7 @@ static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca) for (i = 1; i < BCH_DATA_NR; i++) prt_printf(out, "%-12s:%12llu\n", - bch2_data_types[i], + bch2_data_type_str(i), percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9); } } @@ -908,7 +911,7 @@ SHOW(bch2_dev) } if (attr == &sysfs_has_data) { - prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca)); + prt_bitflags(out, __bch2_data_types, bch2_dev_has_data(c, ca)); prt_char(out, '\n'); } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index c94876b3bb06e4d8bf0ba490421ead37d87e5569..293b90d704fb5b48ed39038e793c4d3cbf77b5a8 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -46,7 +46,7 @@ DECLARE_EVENT_CLASS(fs_str, __assign_str(str, str); ), - TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) + TP_printk("%d,%d\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); DECLARE_EVENT_CLASS(trans_str, @@ -273,28 +273,14 @@ DEFINE_EVENT(bch_fs, journal_full, TP_ARGS(c) ); -DEFINE_EVENT(bch_fs, journal_entry_full, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) +DEFINE_EVENT(fs_str, journal_entry_full, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); -TRACE_EVENT(journal_entry_close, - TP_PROTO(struct bch_fs *c, unsigned bytes), - TP_ARGS(c, bytes), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(u32, bytes ) - ), - - TP_fast_assign( - __entry->dev = c->dev; - __entry->bytes = bytes; - ), - - TP_printk("%d,%d entry bytes %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->bytes) +DEFINE_EVENT(fs_str, journal_entry_close, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(bio, journal_write, @@ -542,7 +528,7 @@ TRACE_EVENT(btree_path_relock_fail, __entry->level = path->level; TRACE_BPOS_assign(pos, path->pos); - c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level), + c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level); __entry->self_read_count = c.n[SIX_LOCK_read]; __entry->self_intent_count = c.n[SIX_LOCK_intent]; @@ -827,40 +813,28 @@ TRACE_EVENT(bucket_evacuate, ); DEFINE_EVENT(fs_str, move_extent, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(fs_str, move_extent_read, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(fs_str, move_extent_write, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(fs_str, move_extent_finish, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); -TRACE_EVENT(move_extent_fail, - TP_PROTO(struct bch_fs *c, const char *msg), - TP_ARGS(c, msg), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __string(msg, msg ) - ), - - TP_fast_assign( - __entry->dev = c->dev; - __assign_str(msg, msg); - ), - - TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) +DEFINE_EVENT(fs_str, move_extent_fail, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(fs_str, move_extent_start_fail, @@ -1039,7 +1013,7 @@ TRACE_EVENT(trans_restart_split_race, __entry->level = b->c.level; __entry->written = b->written; __entry->blocks = btree_blocks(trans->c); - __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b); + __entry->u64s_remaining = bch2_btree_keys_u64s_remaining(b); ), TP_printk("%s %pS l=%u written %u/%u u64s remaining %u", @@ -1146,8 +1120,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, TP_ARGS(trans, caller_ip, path) ); -struct get_locks_fail; - TRACE_EVENT(trans_restart_upgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1195,11 +1167,9 @@ TRACE_EVENT(trans_restart_upgrade, __entry->node_seq) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) +DEFINE_EVENT(trans_str, trans_restart_relock, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), + TP_ARGS(trans, caller_ip, str) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node, diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index c2ef7cddaa4fcb0e9de9df263aadd019cc7a4965..a135136adeee355cb8854482e85b0c85e6c1b8f8 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -241,12 +241,17 @@ bool bch2_is_zero(const void *_p, size_t n) return true; } -void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) +void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits) { while (nr_bits) prt_char(out, '0' + ((v >> --nr_bits) & 1)); } +void bch2_prt_u64_base2(struct printbuf *out, u64 v) +{ + bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); +} + void bch2_print_string_as_lines(const char *prefix, const char *lines) { const char *p; @@ -1186,7 +1191,9 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret) { darray_init(ret); - char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name; + char *dev_name, *s, *orig; + + dev_name = orig = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) return -ENOMEM; @@ -1201,10 +1208,10 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret) } } - kfree(dev_name); + kfree(orig); return 0; err: bch2_darray_str_exit(ret); - kfree(dev_name); + kfree(orig); return -ENOMEM; } diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index c75fc31915d3936d8c0a26949915534aac482b3a..df67bf55fe2bc2d74265eb8a52fe6d22fca2fd2f 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -342,7 +342,8 @@ bool bch2_is_zero(const void *, size_t); u64 bch2_read_flag_list(char *, const char * const[]); -void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); +void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); +void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 5a1858fb9879afd1c70c3d5a64883315090d6dbe..9c0d2316031b1beceda4e1b68dcda4e34184a89e 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -590,8 +590,9 @@ err: mutex_unlock(&inode->ei_update_lock); if (value && - (opt_id == Opt_background_compression || - opt_id == Opt_background_target)) + (opt_id == Opt_background_target || + opt_id == Opt_background_compression || + (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression)))) bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); return bch2_err_class(ret); diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h new file mode 100644 index 0000000000000000000000000000000000000000..e9f810539552ef6d47024d6a8c21967417ebf6a4 --- /dev/null +++ b/fs/bcachefs/xattr_format.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_XATTR_FORMAT_H +#define _BCACHEFS_XATTR_FORMAT_H + +#define KEY_TYPE_XATTR_INDEX_USER 0 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 +#define KEY_TYPE_XATTR_INDEX_SECURITY 4 + +struct bch_xattr { + struct bch_val v; + __u8 x_type; + __u8 x_name_len; + __le16 x_val_len; + __u8 x_name[]; +} __packed __aligned(8); + +#endif /* _BCACHEFS_XATTR_FORMAT_H */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 193168214eeb17fc8a8a9cff3942eb3f68958e1b..68345f73d429aa2d4537ef620a0048e61c4eb7a8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -141,16 +141,16 @@ static int compression_decompress_bio(struct list_head *ws, } static int compression_decompress(int type, struct list_head *ws, - const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + const u8 *data_in, struct page *dest_page, + unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* @@ -1037,14 +1037,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + unsigned long dest_pgoff, size_t srclen, size_t destlen) { + struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); struct list_head *workspace; + const u32 sectorsize = fs_info->sectorsize; int ret; + /* + * The full destination page range should not exceed the page size. + * And the @destlen should not exceed sectorsize, as this is only called for + * inline file extents, which should not exceed sectorsize. + */ + ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize); + workspace = get_workspace(type, 0); ret = compression_decompress(type, workspace, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); put_workspace(type, workspace); return ret; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 93cc92974deee4cebb4fd25d38118f2c046e1840..afd7e50d073d4ac743c924b70e7e1734af2f6ffc 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -148,7 +148,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int zlib_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen); struct list_head *zlib_alloc_workspace(unsigned int level); void zlib_free_workspace(struct list_head *ws); @@ -159,7 +159,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int lzo_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen); struct list_head *lzo_alloc_workspace(unsigned int level); void lzo_free_workspace(struct list_head *ws); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f396aba92c579641d1cce38b48e7e7cd4febc510..8e8cc11112772dfd020217e30d74fe138c3151ca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 bytes_left, end; u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT); - if (WARN_ON(start != aligned_start)) { + /* Adjust the range to be aligned to 512B sectors if necessary. */ + if (start != aligned_start) { len -= aligned_start - start; len = round_down(len, 1 << SECTOR_SHIFT); start = aligned_start; @@ -4298,6 +4299,42 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, return 0; } +static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } else if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + struct btrfs_block_group *block_group; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { + /* + * No lock is OK here because avail is monotinically + * decreasing, and this is just a hint. + */ + u64 avail = block_group->zone_capacity - block_group->alloc_offset; + + if (block_group_bits(block_group, ffe_ctl->flags) && + avail >= ffe_ctl->num_bytes) { + ffe_ctl->hint_byte = block_group->start; + break; + } + } + spin_unlock(&fs_info->zone_active_bgs_lock); + } + + return 0; +} + static int prepare_allocation(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4308,19 +4345,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - if (ffe_ctl->for_treelog) { - spin_lock(&fs_info->treelog_bg_lock); - if (fs_info->treelog_bg) - ffe_ctl->hint_byte = fs_info->treelog_bg; - spin_unlock(&fs_info->treelog_bg_lock); - } - if (ffe_ctl->for_data_reloc) { - spin_lock(&fs_info->relocation_bg_lock); - if (fs_info->data_reloc_bg) - ffe_ctl->hint_byte = fs_info->data_reloc_bg; - spin_unlock(&fs_info->relocation_bg_lock); - } - return 0; + return prepare_allocation_zoned(fs_info, ffe_ctl); default: BUG(); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 809b11472a806c92ef9ad4454d354a9460a51b7b..1eb93d3962aac4608cda0255ea31d7e53dbc8da2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) u64 root_flags; int ret; + down_write(&fs_info->subvol_sem); + /* * Don't allow to delete a subvolume with send in progress. This is * inside the inode lock so the error handling that has to drop the bit @@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) btrfs_warn(fs_info, "attempt to delete subvolume %llu during send", dest->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } if (atomic_read(&dest->nr_swapfiles)) { spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", root->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - down_write(&fs_info->subvol_sem); - ret = may_destroy_subvol(dest); if (ret) - goto out_up_write; + goto out_undead; btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) - goto out_up_write; + goto out_undead; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -4563,15 +4565,17 @@ out_end_trans: inode->i_flags |= S_DEAD; out_release: btrfs_subvolume_release_metadata(root, &block_rsv); -out_up_write: - up_write(&fs_info->subvol_sem); +out_undead: if (ret) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - } else { + } +out_up_write: + up_write(&fs_info->subvol_sem); + if (!ret) { d_invalidate(dentry); btrfs_prune_dentries(dest); ASSERT(dest->send_in_progress == 0); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 41b479861b3c767bb582920db56ea442c8f7f381..dfed9dd9c2d75b8205531b030c220b42820e77ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EOPNOTSUPP; } + if (btrfs_root_refs(&root->root_item) == 0) + return -ENOENT; + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; @@ -2608,6 +2611,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EFAULT; goto out; } + if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { + ret = -EOPNOTSUPP; + goto out; + } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 1131d5a29d612ee50e14c488b1812a0657c259f1..e43bc0fdc74ec9b0224568928b31e0ca10c77805 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -425,16 +425,16 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) } int lzo_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); + struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); + const u32 sectorsize = fs_info->sectorsize; size_t in_len; size_t out_len; size_t max_segment_len = WORKSPACE_BUF_LENGTH; int ret = 0; - char *kaddr; - unsigned long bytes; if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2) return -EUCLEAN; @@ -451,7 +451,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in, } data_in += LZO_LEN; - out_len = PAGE_SIZE; + out_len = sectorsize; ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); if (ret != LZO_E_OK) { pr_warn("BTRFS: decompress failed!\n"); @@ -459,29 +459,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in, goto out; } - if (out_len < start_byte) { + ASSERT(out_len <= sectorsize); + memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len); + /* Early end, considered as an error. */ + if (unlikely(out_len < destlen)) { ret = -EIO; - goto out; + memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len); } - - /* - * the caller is already checking against PAGE_SIZE, but lets - * move this check closer to the memcpy/memset - */ - destlen = min_t(unsigned long, destlen, PAGE_SIZE); - bytes = min_t(unsigned long, destlen, out_len - start_byte); - - kaddr = kmap_local_page(dest_page); - memcpy(kaddr, workspace->buf + start_byte, bytes); - - /* - * btrfs_getblock is doing a zero on the tail of the page too, - * but this will cover anything missing from the decompressed - * data. - */ - if (bytes < destlen) - memset(kaddr+bytes, 0, destlen-bytes); - kunmap_local(kaddr); out: return ret; } diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 6486f0d7e9931b4fafbc03ddc5ddca0863679d7a..8c4fc98ca9ce7de055841a06e43863eeb6b960e0 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, out_unlock: spin_unlock(&fs_info->ref_verify_lock); out: - if (ret) + if (ret) { + btrfs_free_ref_cache(fs_info); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } return ret; } @@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) } } if (ret) { - btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); } btrfs_free_path(path); return ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a01807cbd4d44e4127c798e470cef51d8bfa13e6..0123d272892373b3465c942e75e181d3bc77e681 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1098,12 +1098,22 @@ out: static void scrub_read_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; + struct bio_vec *bvec; + int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); + int num_sectors; + u32 bio_size = 0; + int i; + + ASSERT(sector_nr < stripe->nr_sectors); + bio_for_each_bvec_all(bvec, &bbio->bio, i) + bio_size += bvec->bv_len; + num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits; if (bbio->bio.bi_status) { - bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors); - bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors); + bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors); + bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors); } else { - bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) { @@ -1636,6 +1646,9 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; + unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + + stripe->bg->length - stripe->logical) >> + fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1646,6 +1659,10 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct page *page = scrub_stripe_get_page(stripe, i); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); + /* We're beyond the chunk boundary, no need to read anymore. */ + if (i >= nr_sectors) + break; + /* The current sector cannot be merged, submit the bio. */ if (bbio && ((i > 0 && @@ -1701,6 +1718,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; + unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + + stripe->bg->length - stripe->logical) >> + fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1715,14 +1735,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); - /* Read the whole stripe. */ bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; - for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) { + /* Read the whole range inside the chunk boundary. */ + for (unsigned int cur = 0; cur < nr_sectors; cur++) { + struct page *page = scrub_stripe_get_page(stripe, cur); + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur); int ret; - ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0); + ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); /* We should have allocated enough bio vectors. */ - ASSERT(ret == PAGE_SIZE); + ASSERT(ret == fs_info->sectorsize); } atomic_inc(&stripe->pending_io); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4e36550618e580044fb0b0d573ddfee196cdca5d..2d7519a6ce72d3c58e70b1cb567258e642604a87 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8205,8 +8205,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) goto out; } - sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), - arg->clone_sources_count + 1, + sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1, + sizeof(*sctx->clone_roots), GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 93511d54abf8280bc6778a17b5fa75a28d3585c1..0e49dab8dad2480243f4d32e6ee934c0f2b35b67 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -475,7 +475,8 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&subpage->lock, flags); bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); - folio_start_writeback(folio); + if (!folio_test_writeback(folio)) + folio_start_writeback(folio); spin_unlock_irqrestore(&subpage->lock, flags); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 896acfda17895150ff501960dd72f084c542301e..101f786963d4d7712baab28c912226fb741c0c9b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1457,6 +1457,14 @@ static int btrfs_reconfigure(struct fs_context *fc) btrfs_info_to_ctx(fs_info, &old_ctx); + /* + * This is our "bind mount" trick, we don't want to allow the user to do + * anything other than mount a different ro/rw and a different subvol, + * all of the mount options should be maintained. + */ + if (mount_reconfigure) + ctx->mount_opt = old_ctx.mount_opt; + sync_filesystem(sb); set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 50fdc69fdddf9d26014a65ed73c13fe694d05e4b..6eccf8496486c0630cd85c90ca813170f08e6eb5 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1436,7 +1436,7 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", - ptr, inline_type, end); + ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4c32497311d2ff6ba28fc9ac5ba8dd5b8f835a66..d67785be2c778c6611d639dcbdcffffec4c513c2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3087,7 +3087,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, map = btrfs_find_chunk_map(fs_info, logical, length); if (unlikely(!map)) { - read_unlock(&fs_info->mapping_tree_lock); btrfs_crit(fs_info, "unable to find chunk map for logical %llu length %llu", logical, length); @@ -3095,7 +3094,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, } if (unlikely(map->start > logical || map->start + map->chunk_len <= logical)) { - read_unlock(&fs_info->mapping_tree_lock); btrfs_crit(fs_info, "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", logical, logical + length, map->start, diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 36cf1f0e338e2f59d736aaeb1001e00e8eaddaa3..8da66ea699e8febfdef6cc189c5917d22628265d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -354,18 +354,13 @@ done: } int zlib_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - unsigned long bytes_left; - unsigned long total_out = 0; - unsigned long pg_offset = 0; - - destlen = min_t(unsigned long, destlen, PAGE_SIZE); - bytes_left = destlen; + unsigned long to_copy; workspace->strm.next_in = data_in; workspace->strm.avail_in = srclen; @@ -390,60 +385,30 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in, return -EIO; } - while (bytes_left > 0) { - unsigned long buf_start; - unsigned long buf_offset; - unsigned long bytes; - - ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) - break; - - buf_start = total_out; - total_out = workspace->strm.total_out; - - if (total_out == buf_start) { - ret = -EIO; - break; - } - - if (total_out <= start_byte) - goto next; - - if (total_out > start_byte && buf_start < start_byte) - buf_offset = start_byte - buf_start; - else - buf_offset = 0; - - bytes = min(PAGE_SIZE - pg_offset, - PAGE_SIZE - (buf_offset % PAGE_SIZE)); - bytes = min(bytes, bytes_left); + /* + * Everything (in/out buf) should be at most one sector, there should + * be no need to switch any input/output buffer. + */ + ret = zlib_inflate(&workspace->strm, Z_FINISH); + to_copy = min(workspace->strm.total_out, destlen); + if (ret != Z_STREAM_END) + goto out; - memcpy_to_page(dest_page, pg_offset, - workspace->buf + buf_offset, bytes); + memcpy_to_page(dest_page, dest_pgoff, workspace->buf, to_copy); - pg_offset += bytes; - bytes_left -= bytes; -next: - workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = workspace->buf_size; - } - - if (ret != Z_STREAM_END && bytes_left != 0) +out: + if (unlikely(to_copy != destlen)) { + pr_warn_ratelimited("BTRFS: infalte failed, decompressed=%lu expected=%zu\n", + to_copy, destlen); ret = -EIO; - else + } else { ret = 0; + } zlib_inflateEnd(&workspace->strm); - /* - * this should only happen if zlib returned fewer bytes than we - * expected. btrfs_get_block is responsible for zeroing from the - * end of the inline extent (destlen) to the end of the page - */ - if (pg_offset < destlen) { - memzero_page(dest_page, pg_offset, destlen - pg_offset); - } + if (unlikely(to_copy < destlen)) + memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy); return ret; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 5bd76813b23f065fdf670bf8fe3fbd59ee0c88d9..168af9d000d168324fcc8355781517ddeedeefd1 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2055,6 +2055,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) map = block_group->physical_map; + spin_lock(&fs_info->zone_active_bgs_lock); spin_lock(&block_group->lock); if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { ret = true; @@ -2067,7 +2068,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - spin_lock(&fs_info->zone_active_bgs_lock); for (i = 0; i < map->num_stripes; i++) { struct btrfs_zoned_device_info *zinfo; int reserved = 0; @@ -2087,20 +2087,17 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) */ if (atomic_read(&zinfo->active_zones_left) <= reserved) { ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!is_data) zinfo->reserved_active_zones--; } - spin_unlock(&fs_info->zone_active_bgs_lock); /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); @@ -2108,8 +2105,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* For the active block group list */ btrfs_get_block_group(block_group); - - spin_lock(&fs_info->zone_active_bgs_lock); list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); spin_unlock(&fs_info->zone_active_bgs_lock); @@ -2117,6 +2112,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) out_unlock: spin_unlock(&block_group->lock); + spin_unlock(&fs_info->zone_active_bgs_lock); return ret; } diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index 8df715640a48f32cae9b1e104e2bd55cc99f25fd..c5a070550ee334f69b57cb2b1ed3af7ceaac3b4f 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -2,7 +2,7 @@ config CACHEFILES tristate "Filesystem caching on files" - depends on FSCACHE && BLOCK + depends on NETFS_SUPPORT && FSCACHE && BLOCK help This permits use of a mounted filesystem as a cache for other filesystems - primarily networking filesystems - thus allowing fast diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 4a87c9d714a9498b80599d15461c48f0ea1c3f68..d33169f0018b103a7ad30ed20b258869e740e556 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -246,7 +246,7 @@ extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres, enum fscache_want_state want_state); extern int __cachefiles_prepare_write(struct cachefiles_object *object, struct file *file, - loff_t *_start, size_t *_len, + loff_t *_start, size_t *_len, size_t upper_len, bool no_space_allocated_yet); extern int __cachefiles_write(struct cachefiles_object *object, struct file *file, diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 5857241c59181674ef8dafcfc9b6216d65db75a0..1d685357e67fc71ffc2be73513b00f7efd8ee906 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -517,18 +517,26 @@ cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres, */ int __cachefiles_prepare_write(struct cachefiles_object *object, struct file *file, - loff_t *_start, size_t *_len, + loff_t *_start, size_t *_len, size_t upper_len, bool no_space_allocated_yet) { struct cachefiles_cache *cache = object->volume->cache; loff_t start = *_start, pos; - size_t len = *_len, down; + size_t len = *_len; int ret; /* Round to DIO size */ - down = start - round_down(start, PAGE_SIZE); - *_start = start - down; - *_len = round_up(down + len, PAGE_SIZE); + start = round_down(*_start, PAGE_SIZE); + if (start != *_start || *_len > upper_len) { + /* Probably asked to cache a streaming write written into the + * pagecache when the cookie was temporarily out of service to + * culling. + */ + fscache_count_dio_misfit(); + return -ENOBUFS; + } + + *_len = round_up(len, PAGE_SIZE); /* We need to work out whether there's sufficient disk space to perform * the write - but we can skip that check if we have space already @@ -539,7 +547,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object, pos = cachefiles_inject_read_error(); if (pos == 0) - pos = vfs_llseek(file, *_start, SEEK_DATA); + pos = vfs_llseek(file, start, SEEK_DATA); if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { if (pos == -ENXIO) goto check_space; /* Unallocated tail */ @@ -547,7 +555,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object, cachefiles_trace_seek_error); return pos; } - if ((u64)pos >= (u64)*_start + *_len) + if ((u64)pos >= (u64)start + *_len) goto check_space; /* Unallocated region */ /* We have a block that's at least partially filled - if we're low on @@ -560,13 +568,13 @@ int __cachefiles_prepare_write(struct cachefiles_object *object, pos = cachefiles_inject_read_error(); if (pos == 0) - pos = vfs_llseek(file, *_start, SEEK_HOLE); + pos = vfs_llseek(file, start, SEEK_HOLE); if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { trace_cachefiles_io_error(object, file_inode(file), pos, cachefiles_trace_seek_error); return pos; } - if ((u64)pos >= (u64)*_start + *_len) + if ((u64)pos >= (u64)start + *_len) return 0; /* Fully allocated */ /* Partially allocated, but insufficient space: cull. */ @@ -574,7 +582,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object, ret = cachefiles_inject_remove_error(); if (ret == 0) ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - *_start, *_len); + start, *_len); if (ret < 0) { trace_cachefiles_io_error(object, file_inode(file), ret, cachefiles_trace_fallocate_error); @@ -591,8 +599,8 @@ check_space: } static int cachefiles_prepare_write(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size, - bool no_space_allocated_yet) + loff_t *_start, size_t *_len, size_t upper_len, + loff_t i_size, bool no_space_allocated_yet) { struct cachefiles_object *object = cachefiles_cres_object(cres); struct cachefiles_cache *cache = object->volume->cache; @@ -608,7 +616,7 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres, cachefiles_begin_secure(cache, &saved_cred); ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), - _start, _len, + _start, _len, upper_len, no_space_allocated_yet); cachefiles_end_secure(cache, saved_cred); return ret; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index b8fbbb1961bbcefc158fd32306d3a6abd63e607c..5fd74ec60befc6cb192e8102a14e87de2b45bb87 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -50,7 +50,7 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, return -ENOBUFS; cachefiles_begin_secure(cache, &saved_cred); - ret = __cachefiles_prepare_write(object, file, &pos, &len, true); + ret = __cachefiles_prepare_write(object, file, &pos, &len, len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) return ret; diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 94df854147d3597e0b6b7655e5c68e0d87334543..7249d70e1a43fade3a72728df628274d25f7e9c9 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -7,6 +7,7 @@ config CEPH_FS select CRYPTO_AES select CRYPTO select NETFS_SUPPORT + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION default n help Choose Y or M here to include support for mounting the diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 13af429ab030b6232c197c53659d982ac8bdc43a..1340d77124ae4db09c3b96548acdf1cd8a6c3fb0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -159,27 +159,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, ceph_put_snap_context(snapc); } - folio_wait_fscache(folio); -} - -static bool ceph_release_folio(struct folio *folio, gfp_t gfp) -{ - struct inode *inode = folio->mapping->host; - struct ceph_client *cl = ceph_inode_to_client(inode); - - doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), - folio->index, folio_test_dirty(folio) ? "" : "not "); - - if (folio_test_private(folio)) - return false; - - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - ceph_fscache_note_page_release(inode); - return true; + netfs_invalidate_folio(folio, offset, length); } static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) @@ -357,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) u64 len = subreq->len; bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 off = subreq->start; + int extent_cnt; if (ceph_inode_is_shutdown(inode)) { err = -EIO; @@ -370,8 +351,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, - NULL, ci->i_truncate_seq, ci->i_truncate_size, false); + CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, + ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; @@ -379,7 +360,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) } if (sparse) { - err = ceph_alloc_sparse_ext_map(&req->r_ops[0]); + extent_cnt = __ceph_sparse_read_ext_count(inode, len); + err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt); if (err) goto out; } @@ -509,7 +491,6 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq) const struct netfs_request_ops ceph_netfs_ops = { .init_request = ceph_init_request, .free_request = ceph_netfs_free_request, - .begin_cache_operation = ceph_begin_cache_operation, .issue_read = ceph_netfs_issue_read, .expand_readahead = ceph_netfs_expand_readahead, .clamp_length = ceph_netfs_clamp_length, @@ -1586,7 +1567,7 @@ const struct address_space_operations ceph_aops = { .write_end = ceph_write_end, .dirty_folio = ceph_dirty_folio, .invalidate_folio = ceph_invalidate_folio, - .release_folio = ceph_release_folio, + .release_folio = netfs_release_folio, .direct_IO = noop_direct_IO, }; diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index dc502daac49ab580380deca8f969b3f648a4c299..20efac020394eeb3608d6a2200e8a08591f6e7ba 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -43,38 +43,19 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to) } } -static inline void ceph_fscache_unpin_writeback(struct inode *inode, +static inline int ceph_fscache_unpin_writeback(struct inode *inode, struct writeback_control *wbc) { - fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode))); + return netfs_unpin_writeback(inode, wbc); } -static inline int ceph_fscache_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - struct ceph_inode_info *ci = ceph_inode(mapping->host); - - return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci)); -} - -static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) -{ - struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode)); - - return fscache_begin_read_operation(&rreq->cache_resources, cookie); -} +#define ceph_fscache_dirty_folio netfs_dirty_folio static inline bool ceph_is_cache_enabled(struct inode *inode) { return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode))); } -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - - fscache_note_page_release(ceph_fscache_cookie(ci)); -} #else /* CONFIG_CEPH_FSCACHE */ static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc) @@ -119,30 +100,18 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to) { } -static inline void ceph_fscache_unpin_writeback(struct inode *inode, - struct writeback_control *wbc) +static inline int ceph_fscache_unpin_writeback(struct inode *inode, + struct writeback_control *wbc) { + return 0; } -static inline int ceph_fscache_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - return filemap_dirty_folio(mapping, folio); -} +#define ceph_fscache_dirty_folio filemap_dirty_folio static inline bool ceph_is_cache_enabled(struct inode *inode) { return false; } - -static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) -{ - return -ENOBUFS; -} - -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ -} #endif /* CONFIG_CEPH_FSCACHE */ #endif diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2c0b8dc3dd0d80314b04c0717501f066079b97eb..9c02f328c966cbdd12b8af17d7ddb9d5bb19ea38 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4887,13 +4887,15 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct inode *dir, int mds, int drop, int unless) { - struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_client *cl; int force = 0; int ret; + /* This shouldn't happen */ + BUG_ON(!dir); + /* * force an record for the directory caps if we have a dentry lease. * this is racy (can't take i_ceph_lock and d_lock together), but it @@ -4903,14 +4905,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, spin_lock(&dentry->d_lock); if (di->lease_session && di->lease_session->s_mds == mds) force = 1; - if (!dir) { - parent = dget(dentry->d_parent); - dir = d_inode(parent); - } spin_unlock(&dentry->d_lock); ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); - dput(parent); cl = ceph_inode_to_client(dir); spin_lock(&dentry->d_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 678596684596f71d5ad730713eb7aae0431d7f4e..0e9f56eaba1e693d22142487e79b433a0213f759 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1593,10 +1593,12 @@ struct ceph_lease_walk_control { unsigned long dir_lease_ttl; }; +static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *); +static int __dentry_lease_check(const struct dentry *); + static unsigned long __dentry_leases_walk(struct ceph_mds_client *mdsc, - struct ceph_lease_walk_control *lwc, - int (*check)(struct dentry*, void*)) + struct ceph_lease_walk_control *lwc) { struct ceph_dentry_info *di, *tmp; struct dentry *dentry, *last = NULL; @@ -1624,7 +1626,10 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc, goto next; } - ret = check(dentry, lwc); + if (lwc->dir_lease) + ret = __dir_lease_check(dentry, lwc); + else + ret = __dentry_lease_check(dentry); if (ret & TOUCH) { /* move it into tail of dir lease list */ __dentry_dir_lease_touch(mdsc, di); @@ -1681,7 +1686,7 @@ next: return freed; } -static int __dentry_lease_check(struct dentry *dentry, void *arg) +static int __dentry_lease_check(const struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); int ret; @@ -1696,9 +1701,9 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg) return DELETE; } -static int __dir_lease_check(struct dentry *dentry, void *arg) +static int __dir_lease_check(const struct dentry *dentry, + struct ceph_lease_walk_control *lwc) { - struct ceph_lease_walk_control *lwc = arg; struct ceph_dentry_info *di = ceph_dentry(dentry); int ret = __dir_lease_try_check(dentry); @@ -1737,7 +1742,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc) lwc.dir_lease = false; lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; - freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); + freed = __dentry_leases_walk(mdsc, &lwc); if (!lwc.nr_to_scan) /* more invalid leases */ return -EAGAIN; @@ -1747,7 +1752,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc) lwc.dir_lease = true; lwc.expire_dir_lease = freed < count; lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ; - freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); + freed +=__dentry_leases_walk(mdsc, &lwc); if (!lwc.nr_to_scan) /* more to check */ return -EAGAIN; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 726af69d4d62cd7341c0c1aefa46fd553f89ec47..a79f163ae4ed2ce1962289478b348c53a338a8c0 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -286,8 +286,6 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); } - if (IS_ERR(inode)) - return ERR_CAST(inode); /* see comments in ceph_get_parent() */ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d380d9dad0e018426177110f17c51942b1c8a868..abe8028d95bf4e3e99091d83cf1784f2b9a249e1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1029,6 +1029,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, struct ceph_osd_req_op *op; u64 read_off = off; u64 read_len = len; + int extent_cnt; /* determine new offset/length if encrypted */ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); @@ -1068,7 +1069,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, op = &req->r_ops[0]; if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); + extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); if (ret) { ceph_osdc_put_request(req); break; @@ -1465,6 +1467,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, ssize_t len; struct ceph_osd_req_op *op; int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ; + int extent_cnt; if (write) size = min_t(u64, size, fsc->mount_options->wsize); @@ -1528,7 +1531,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); op = &req->r_ops[0]; if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); + extent_cnt = __ceph_sparse_read_ext_count(inode, size); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); if (ret) { ceph_osdc_put_request(req); break; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0679240f06db924e9aba25052675268885c4bd04..0c25d326afc41d9d4d8ba98d3c6c5976647bb3fe 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -574,7 +574,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) doutc(fsc->client, "%p\n", &ci->netfs.inode); /* Set parameters for the netfs library */ - netfs_inode_init(&ci->netfs, &ceph_netfs_ops); + netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); spin_lock_init(&ci->i_ceph_lock); @@ -694,7 +694,7 @@ void ceph_evict_inode(struct inode *inode) percpu_counter_dec(&mdsc->metric.total_inodes); truncate_inode_pages_final(&inode->i_data); - if (inode->i_state & I_PINNING_FSCACHE_WB) + if (inode->i_state & I_PINNING_NETFS_WB) ceph_fscache_unuse_cookie(inode, true); clear_inode(inode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 02ebfabfc8eef26e7753ea4b7a8e9540e64d35c3..548d1de379f3570b729af9e50b67aaff65e36e14 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1534,7 +1534,8 @@ static int encode_metric_spec(void **p, void *end) * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. */ -static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) +static struct ceph_msg * +create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq) { struct ceph_msg *msg; struct ceph_mds_session_head *h; @@ -1578,6 +1579,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 size = METRIC_BYTES(count); extra_bytes += 2 + 4 + 4 + size; + /* flags, mds auth caps and oldest_client_tid */ + extra_bytes += 4 + 4 + 8; + /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); @@ -1589,16 +1593,16 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 end = p + msg->front.iov_len; h = p; - h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); + h->op = cpu_to_le32(op); h->seq = cpu_to_le64(seq); /* * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map * - * ClientSession messages with metadata are v4 + * ClientSession messages with metadata are v7 */ - msg->hdr.version = cpu_to_le16(4); + msg->hdr.version = cpu_to_le16(7); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ @@ -1634,6 +1638,15 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 return ERR_PTR(ret); } + /* version == 5, flags */ + ceph_encode_32(&p, 0); + + /* version == 6, mds auth caps */ + ceph_encode_32(&p, 0); + + /* version == 7, oldest_client_tid */ + ceph_encode_64(&p, mdsc->oldest_tid); + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -1663,7 +1676,8 @@ static int __open_session(struct ceph_mds_client *mdsc, session->s_renew_requested = jiffies; /* send connect message */ - msg = create_session_open_msg(mdsc, session->s_seq); + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN, + session->s_seq); if (IS_ERR(msg)) return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); @@ -2028,10 +2042,10 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, doutc(cl, "to mds%d (%s)\n", session->s_mds, ceph_mds_state_name(state)); - msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); - if (!msg) - return -ENOMEM; + if (IS_ERR(msg)) + return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); return 0; } @@ -4128,12 +4142,12 @@ static void handle_session(struct ceph_mds_session *session, pr_info_client(cl, "mds%d reconnect success\n", session->s_mds); + session->s_features = features; if (session->s_state == CEPH_MDS_SESSION_OPEN) { pr_notice_client(cl, "mds%d is already opened\n", session->s_mds); } else { session->s_state = CEPH_MDS_SESSION_OPEN; - session->s_features = features; renewed_caps(mdsc, session, 0); if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) @@ -5870,7 +5884,8 @@ static void mds_peer_reset(struct ceph_connection *con) pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", s->s_mds); - if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) + if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO && + ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT) send_mds_reconnect(mdsc, s); } diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 9d36c3532de14fc41e0517f494fd6cfb7713cc28..06ee397e0c3a6172592e62dba95cd267cfff0db1 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -197,10 +197,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) } /* - * This function walks through the snaprealm for an inode and returns the - * ceph_snap_realm for the first snaprealm that has quotas set (max_files, + * This function walks through the snaprealm for an inode and set the + * realmp with the first snaprealm that has quotas set (max_files, * max_bytes, or any, depending on the 'which_quota' argument). If the root is - * reached, return the root ceph_snap_realm instead. + * reached, set the realmp with the root ceph_snap_realm instead. * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. @@ -211,10 +211,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) * this function will return -EAGAIN; otherwise, the snaprealms walk-through * will be restarted. */ -static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, - struct inode *inode, - enum quota_get_realm which_quota, - bool retry) +static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, + enum quota_get_realm which_quota, + struct ceph_snap_realm **realmp, bool retry) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = NULL; @@ -222,8 +221,10 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, struct inode *in; bool has_quota; + if (realmp) + *realmp = NULL; if (ceph_snap(inode) != CEPH_NOSNAP) - return NULL; + return 0; restart: realm = ceph_inode(inode)->i_snap_realm; @@ -250,7 +251,7 @@ restart: break; ceph_put_snap_realm(mdsc, realm); if (!retry) - return ERR_PTR(-EAGAIN); + return -EAGAIN; goto restart; } @@ -259,8 +260,11 @@ restart: iput(in); next = realm->parent; - if (has_quota || !next) - return realm; + if (has_quota || !next) { + if (realmp) + *realmp = realm; + return 0; + } ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); @@ -269,7 +273,7 @@ restart: if (realm) ceph_put_snap_realm(mdsc, realm); - return NULL; + return 0; } bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) @@ -277,6 +281,7 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; + int ret; restart: /* @@ -286,9 +291,9 @@ restart: * dropped and we can then restart the whole operation. */ down_read(&mdsc->snap_rwsem); - old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); - new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); - if (PTR_ERR(new_realm) == -EAGAIN) { + get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true); + ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false); + if (ret == -EAGAIN) { up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); @@ -492,8 +497,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) bool is_updated = false; down_read(&mdsc->snap_rwsem); - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), - QUOTA_GET_MAX_BYTES, true); + get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, + &realm, true); up_read(&mdsc->snap_rwsem); if (!realm) return false; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fe0f64a0acb27058014b188bec906e07310fad1f..b06e2bc86221bf02fe54b2aa3304be80bedc5214 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -3,6 +3,7 @@ #define _FS_CEPH_SUPER_H #include +#include #include #include @@ -1407,6 +1408,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci, ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota); } +static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len) +{ + int cnt = 0; + + if (IS_ENCRYPTED(inode)) { + cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT; + if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL) + cnt = 0; + } + + return cnt; +} + extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 1d318f85232de9361714471ac973762ed2e6b0e6..fffd3919343e4553abdb1e6607c2eb4ef2bda011 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -114,8 +114,11 @@ config EROFS_FS_ZIP_DEFLATE config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support" - depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y) - default n + depends on EROFS_FS + select NETFS_SUPPORT + select FSCACHE + select CACHEFILES + select CACHEFILES_ONDEMAND help This permits EROFS to use fscache-backed data blobs with on-demand read support. diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 1d65b9f60a39059c0e90ce04bbbe4ae5c69ef510..072ef6a66823ef351923f2c0514c9ddec50e5d8f 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -408,7 +408,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) int size, ret = 0; if (!erofs_sb_has_compr_cfgs(sbi)) { - sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; + sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4; return z_erofs_load_lz4_config(sb, dsb, NULL, 0); } diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 87ff35bff8d5bb3acb8dbc4d79c07d1d018cba56..bc12030393b24f26231fb363ac07e3150cd6babb 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -165,10 +165,10 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) { int ret; - struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private; + struct erofs_fscache *ctx = folio->mapping->host->i_private; struct erofs_fscache_request *req; - req = erofs_fscache_req_alloc(folio_mapping(folio), + req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); if (IS_ERR(req)) { folio_unlock(folio); @@ -276,7 +276,7 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio) struct erofs_fscache_request *req; int ret; - req = erofs_fscache_req_alloc(folio_mapping(folio), + req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); if (IS_ERR(req)) { folio_unlock(folio); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 9753875e41cb35a4e83468aafb885f71a4bb1547..e313c936351d51fb39685702437d22bbef16719a 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -454,7 +454,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff; + unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; @@ -543,17 +543,20 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) - map->m_algorithmformat = - Z_EROFS_COMPRESSION_INTERLACED; - else - map->m_algorithmformat = - Z_EROFS_COMPRESSION_SHIFTED; - } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) { - map->m_algorithmformat = vi->z_algorithmtype[1]; + afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? + Z_EROFS_COMPRESSION_INTERLACED : + Z_EROFS_COMPRESSION_SHIFTED; } else { - map->m_algorithmformat = vi->z_algorithmtype[0]; + afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ? + vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; + if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + afmt, vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } } + map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && diff --git a/fs/exec.c b/fs/exec.c index 73e4045df271d148377340a40c77271ee36b2161..af4fbb61cd53e97c788387a0d8277d1ce5495d7d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) struct filename *tmp = getname(library); int error = PTR_ERR(tmp); static const struct open_flags uselib_flags = { - .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .open_flag = O_LARGEFILE | O_RDONLY, .acc_mode = MAY_READ | MAY_EXEC, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, @@ -904,6 +904,10 @@ EXPORT_SYMBOL(transfer_args_to_stack); #endif /* CONFIG_MMU */ +/* + * On success, caller must call do_close_execat() on the returned + * struct file to close it. + */ static struct file *do_open_execat(int fd, struct filename *name, int flags) { struct file *file; @@ -948,6 +952,17 @@ exit: return ERR_PTR(err); } +/** + * open_exec - Open a path name for execution + * + * @name: path name to open with the intent of executing it. + * + * Returns ERR_PTR on failure or allocated struct file on success. + * + * As this is a wrapper for the internal do_open_execat(), callers + * must call allow_write_access() before fput() on release. Also see + * do_close_execat(). + */ struct file *open_exec(const char *name) { struct filename *filename = getname_kernel(name); @@ -1409,6 +1424,9 @@ int begin_new_exec(struct linux_binprm * bprm) out_unlock: up_write(&me->signal->exec_update_lock); + if (!bprm->cred) + mutex_unlock(&me->signal->cred_guard_mutex); + out: return retval; } @@ -1484,6 +1502,15 @@ static int prepare_bprm_creds(struct linux_binprm *bprm) return -ENOMEM; } +/* Matches do_open_execat() */ +static void do_close_execat(struct file *file) +{ + if (!file) + return; + allow_write_access(file); + fput(file); +} + static void free_bprm(struct linux_binprm *bprm) { if (bprm->mm) { @@ -1495,10 +1522,7 @@ static void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } - if (bprm->file) { - allow_write_access(bprm->file); - fput(bprm->file); - } + do_close_execat(bprm->file); if (bprm->executable) fput(bprm->executable); /* If a binfmt changed the interp, free it. */ @@ -1508,12 +1532,23 @@ static void free_bprm(struct linux_binprm *bprm) kfree(bprm); } -static struct linux_binprm *alloc_bprm(int fd, struct filename *filename) +static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags) { - struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + struct linux_binprm *bprm; + struct file *file; int retval = -ENOMEM; - if (!bprm) - goto out; + + file = do_open_execat(fd, filename, flags); + if (IS_ERR(file)) + return ERR_CAST(file); + + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) { + do_close_execat(file); + return ERR_PTR(-ENOMEM); + } + + bprm->file = file; if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; @@ -1526,18 +1561,28 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename) if (!bprm->fdpath) goto out_free; + /* + * Record that a name derived from an O_CLOEXEC fd will be + * inaccessible after exec. This allows the code in exec to + * choose to fail when the executable is not mmaped into the + * interpreter and an open file descriptor is not passed to + * the interpreter. This makes for a better user experience + * than having the interpreter start and then immediately fail + * when it finds the executable is inaccessible. + */ + if (get_close_on_exec(fd)) + bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; + bprm->filename = bprm->fdpath; } bprm->interp = bprm->filename; retval = bprm_mm_init(bprm); - if (retval) - goto out_free; - return bprm; + if (!retval) + return bprm; out_free: free_bprm(bprm); -out: return ERR_PTR(retval); } @@ -1588,6 +1633,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) } rcu_read_unlock(); + /* "users" and "in_exec" locked for copy_fs() */ if (p->fs->users > n_fs) bprm->unsafe |= LSM_UNSAFE_SHARE; else @@ -1804,13 +1850,8 @@ static int exec_binprm(struct linux_binprm *bprm) return 0; } -/* - * sys_execve() executes a new program. - */ -static int bprm_execve(struct linux_binprm *bprm, - int fd, struct filename *filename, int flags) +static int bprm_execve(struct linux_binprm *bprm) { - struct file *file; int retval; retval = prepare_bprm_creds(bprm); @@ -1826,26 +1867,8 @@ static int bprm_execve(struct linux_binprm *bprm, current->in_execve = 1; sched_mm_cid_before_execve(current); - file = do_open_execat(fd, filename, flags); - retval = PTR_ERR(file); - if (IS_ERR(file)) - goto out_unmark; - sched_exec(); - bprm->file = file; - /* - * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. This allows the code in exec to - * choose to fail when the executable is not mmaped into the - * interpreter and an open file descriptor is not passed to - * the interpreter. This makes for a better user experience - * than having the interpreter start and then immediately fail - * when it finds the executable is inaccessible. - */ - if (bprm->fdpath && get_close_on_exec(fd)) - bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - /* Set the unchanging part of bprm->cred */ retval = security_bprm_creds_for_exec(bprm); if (retval) @@ -1875,7 +1898,6 @@ out: if (bprm->point_of_no_return && !fatal_signal_pending(current)) force_fatal_sig(SIGSEGV); -out_unmark: sched_mm_cid_after_execve(current); current->fs->in_exec = 0; current->in_execve = 0; @@ -1910,7 +1932,7 @@ static int do_execveat_common(int fd, struct filename *filename, * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; - bprm = alloc_bprm(fd, filename); + bprm = alloc_bprm(fd, filename, flags); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; @@ -1959,7 +1981,7 @@ static int do_execveat_common(int fd, struct filename *filename, bprm->argc = 1; } - retval = bprm_execve(bprm, fd, filename, flags); + retval = bprm_execve(bprm); out_free: free_bprm(bprm); @@ -1984,7 +2006,7 @@ int kernel_execve(const char *kernel_filename, if (IS_ERR(filename)) return PTR_ERR(filename); - bprm = alloc_bprm(fd, filename); + bprm = alloc_bprm(fd, filename, 0); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; @@ -2019,7 +2041,7 @@ int kernel_execve(const char *kernel_filename, if (retval < 0) goto out_free; - retval = bprm_execve(bprm, fd, filename, 0); + retval = bprm_execve(bprm); out_free: free_bprm(bprm); out_ret: diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1767493dffda73b77fe3c394967c003a426fb13d..3d84fcc471c6000e38625e2652121282e4bec3c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1675,11 +1675,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) inode->i_state |= I_DIRTY_PAGES; - else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) { + else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) { if (!(inode->i_state & I_DIRTY_PAGES)) { - inode->i_state &= ~I_PINNING_FSCACHE_WB; - wbc->unpinned_fscache_wb = true; - dirty |= I_PINNING_FSCACHE_WB; /* Cause write_inode */ + inode->i_state &= ~I_PINNING_NETFS_WB; + wbc->unpinned_netfs_wb = true; + dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */ } } @@ -1691,7 +1691,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) if (ret == 0) ret = err; } - wbc->unpinned_fscache_wb = false; + wbc->unpinned_netfs_wb = false; trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; } diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig deleted file mode 100644 index b313a978ae0a25cfbf44a866f178b2ecacb96aaa..0000000000000000000000000000000000000000 --- a/fs/fscache/Kconfig +++ /dev/null @@ -1,40 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config FSCACHE - tristate "General filesystem local caching manager" - select NETFS_SUPPORT - help - This option enables a generic filesystem caching manager that can be - used by various network and other filesystems to cache data locally. - Different sorts of caches can be plugged in, depending on the - resources available. - - See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_STATS - bool "Gather statistical information on local caching" - depends on FSCACHE && PROC_FS - select NETFS_STATS - help - This option causes statistical information to be gathered on local - caching and exported through file: - - /proc/fs/fscache/stats - - The gathering of statistics adds a certain amount of overhead to - execution as there are a quite a few stats gathered, and on a - multi-CPU system these may be on cachelines that keep bouncing - between CPUs. On the other hand, the stats are very useful for - debugging purposes. Saying 'Y' here is recommended. - - See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_DEBUG - bool "Debug FS-Cache" - depends on FSCACHE - help - This permits debugging to be dynamically enabled in the local caching - management module. If this is set, the debugging output may be - enabled by setting bits in /sys/modules/fscache/parameter/debug. - - See Documentation/filesystems/caching/fscache.rst for more information. diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile deleted file mode 100644 index afb090ea16c40ca8d27839823a60a92a1748ce16..0000000000000000000000000000000000000000 --- a/fs/fscache/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for general filesystem caching code -# - -fscache-y := \ - cache.o \ - cookie.o \ - io.o \ - main.o \ - volume.o - -fscache-$(CONFIG_PROC_FS) += proc.o -fscache-$(CONFIG_FSCACHE_STATS) += stats.o - -obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h deleted file mode 100644 index 1336f517e9b1a60a41281f2f9e21d3e9dddcd025..0000000000000000000000000000000000000000 --- a/fs/fscache/internal.h +++ /dev/null @@ -1,277 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Internal definitions for FS-Cache - * - * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) "FS-Cache: " fmt - -#include -#include -#include -#include -#include - -/* - * cache.c - */ -#ifdef CONFIG_PROC_FS -extern const struct seq_operations fscache_caches_seq_ops; -#endif -bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); -void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); -struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache); -void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where); - -static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache) -{ - return smp_load_acquire(&cache->state); -} - -static inline bool fscache_cache_is_live(const struct fscache_cache *cache) -{ - return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE; -} - -static inline void fscache_set_cache_state(struct fscache_cache *cache, - enum fscache_cache_state new_state) -{ - smp_store_release(&cache->state, new_state); - -} - -static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, - enum fscache_cache_state old_state, - enum fscache_cache_state new_state) -{ - return try_cmpxchg_release(&cache->state, &old_state, new_state); -} - -/* - * cookie.c - */ -extern struct kmem_cache *fscache_cookie_jar; -#ifdef CONFIG_PROC_FS -extern const struct seq_operations fscache_cookies_seq_ops; -#endif -extern struct timer_list fscache_cookie_lru_timer; - -extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); -extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie, - enum fscache_access_trace why); - -static inline void fscache_see_cookie(struct fscache_cookie *cookie, - enum fscache_cookie_trace where) -{ - trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), - where); -} - -/* - * main.c - */ -extern unsigned fscache_debug; - -extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len); - -/* - * proc.c - */ -#ifdef CONFIG_PROC_FS -extern int __init fscache_proc_init(void); -extern void fscache_proc_cleanup(void); -#else -#define fscache_proc_init() (0) -#define fscache_proc_cleanup() do {} while (0) -#endif - -/* - * stats.c - */ -#ifdef CONFIG_FSCACHE_STATS -extern atomic_t fscache_n_volumes; -extern atomic_t fscache_n_volumes_collision; -extern atomic_t fscache_n_volumes_nomem; -extern atomic_t fscache_n_cookies; -extern atomic_t fscache_n_cookies_lru; -extern atomic_t fscache_n_cookies_lru_expired; -extern atomic_t fscache_n_cookies_lru_removed; -extern atomic_t fscache_n_cookies_lru_dropped; - -extern atomic_t fscache_n_acquires; -extern atomic_t fscache_n_acquires_ok; -extern atomic_t fscache_n_acquires_oom; - -extern atomic_t fscache_n_invalidates; - -extern atomic_t fscache_n_relinquishes; -extern atomic_t fscache_n_relinquishes_retire; -extern atomic_t fscache_n_relinquishes_dropped; - -extern atomic_t fscache_n_resizes; -extern atomic_t fscache_n_resizes_null; - -static inline void fscache_stat(atomic_t *stat) -{ - atomic_inc(stat); -} - -static inline void fscache_stat_d(atomic_t *stat) -{ - atomic_dec(stat); -} - -#define __fscache_stat(stat) (stat) - -int fscache_stats_show(struct seq_file *m, void *v); -#else - -#define __fscache_stat(stat) (NULL) -#define fscache_stat(stat) do {} while (0) -#define fscache_stat_d(stat) do {} while (0) -#endif - -/* - * volume.c - */ -#ifdef CONFIG_PROC_FS -extern const struct seq_operations fscache_volumes_seq_ops; -#endif - -struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); -void fscache_put_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); -bool fscache_begin_volume_access(struct fscache_volume *volume, - struct fscache_cookie *cookie, - enum fscache_access_trace why); -void fscache_create_volume(struct fscache_volume *volume, bool wait); - - -/*****************************************************************************/ -/* - * debug tracing - */ -#define dbgprintk(FMT, ...) \ - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) - -#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) -#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) -#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) - -#define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__) - -#ifdef __KDEBUG -#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) -#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) -#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) - -#elif defined(CONFIG_FSCACHE_DEBUG) -#define _enter(FMT, ...) \ -do { \ - if (__do_kdebug(ENTER)) \ - kenter(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _leave(FMT, ...) \ -do { \ - if (__do_kdebug(LEAVE)) \ - kleave(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _debug(FMT, ...) \ -do { \ - if (__do_kdebug(DEBUG)) \ - kdebug(FMT, ##__VA_ARGS__); \ -} while (0) - -#else -#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) -#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) -#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) -#endif - -/* - * determine whether a particular optional debugging point should be logged - * - we need to go through three steps to persuade cpp to correctly join the - * shorthand in FSCACHE_DEBUG_LEVEL with its prefix - */ -#define ____do_kdebug(LEVEL, POINT) \ - unlikely((fscache_debug & \ - (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) -#define ___do_kdebug(LEVEL, POINT) \ - ____do_kdebug(LEVEL, POINT) -#define __do_kdebug(POINT) \ - ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) - -#define FSCACHE_DEBUG_CACHE 0 -#define FSCACHE_DEBUG_COOKIE 1 -#define FSCACHE_DEBUG_OBJECT 2 -#define FSCACHE_DEBUG_OPERATION 3 - -#define FSCACHE_POINT_ENTER 1 -#define FSCACHE_POINT_LEAVE 2 -#define FSCACHE_POINT_DEBUG 4 - -#ifndef FSCACHE_DEBUG_LEVEL -#define FSCACHE_DEBUG_LEVEL CACHE -#endif - -/* - * assertions - */ -#if 1 /* defined(__KDEBUGALL) */ - -#define ASSERT(X) \ -do { \ - if (unlikely(!(X))) { \ - pr_err("\n"); \ - pr_err("Assertion failed\n"); \ - BUG(); \ - } \ -} while (0) - -#define ASSERTCMP(X, OP, Y) \ -do { \ - if (unlikely(!((X) OP (Y)))) { \ - pr_err("\n"); \ - pr_err("Assertion failed\n"); \ - pr_err("%lx " #OP " %lx is false\n", \ - (unsigned long)(X), (unsigned long)(Y)); \ - BUG(); \ - } \ -} while (0) - -#define ASSERTIF(C, X) \ -do { \ - if (unlikely((C) && !(X))) { \ - pr_err("\n"); \ - pr_err("Assertion failed\n"); \ - BUG(); \ - } \ -} while (0) - -#define ASSERTIFCMP(C, X, OP, Y) \ -do { \ - if (unlikely((C) && !((X) OP (Y)))) { \ - pr_err("\n"); \ - pr_err("Assertion failed\n"); \ - pr_err("%lx " #OP " %lx is false\n", \ - (unsigned long)(X), (unsigned long)(Y)); \ - BUG(); \ - } \ -} while (0) - -#else - -#define ASSERT(X) do {} while (0) -#define ASSERTCMP(X, OP, Y) do {} while (0) -#define ASSERTIF(C, X) do {} while (0) -#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) - -#endif /* assert or not */ diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index b4db21022cb43f3b371e31687a89ec309f5b3726..bec805e0c44c072190394283a598d27ee095d0b7 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -21,3 +21,42 @@ config NETFS_STATS multi-CPU system these may be on cachelines that keep bouncing between CPUs. On the other hand, the stats are very useful for debugging purposes. Saying 'Y' here is recommended. + +config FSCACHE + bool "General filesystem local caching manager" + depends on NETFS_SUPPORT + help + This option enables a generic filesystem caching manager that can be + used by various network and other filesystems to cache data locally. + Different sorts of caches can be plugged in, depending on the + resources available. + + See Documentation/filesystems/caching/fscache.rst for more information. + +config FSCACHE_STATS + bool "Gather statistical information on local caching" + depends on FSCACHE && PROC_FS + select NETFS_STATS + help + This option causes statistical information to be gathered on local + caching and exported through file: + + /proc/fs/fscache/stats + + The gathering of statistics adds a certain amount of overhead to + execution as there are a quite a few stats gathered, and on a + multi-CPU system these may be on cachelines that keep bouncing + between CPUs. On the other hand, the stats are very useful for + debugging purposes. Saying 'Y' here is recommended. + + See Documentation/filesystems/caching/fscache.rst for more information. + +config FSCACHE_DEBUG + bool "Debug FS-Cache" + depends on FSCACHE + help + This permits debugging to be dynamically enabled in the local caching + management module. If this is set, the debugging output may be + enabled by setting bits in /sys/modules/fscache/parameter/debug. + + See Documentation/filesystems/caching/fscache.rst for more information. diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile index 386d6fb92793a5d1f4f247e9a1440bea7a1eb59d..d4d1d799819ec4c92807449aebeb38e744d43991 100644 --- a/fs/netfs/Makefile +++ b/fs/netfs/Makefile @@ -2,11 +2,29 @@ netfs-y := \ buffered_read.o \ + buffered_write.o \ + direct_read.o \ + direct_write.o \ io.o \ iterator.o \ + locking.o \ main.o \ - objects.o + misc.o \ + objects.o \ + output.o netfs-$(CONFIG_NETFS_STATS) += stats.o -obj-$(CONFIG_NETFS_SUPPORT) := netfs.o +netfs-$(CONFIG_FSCACHE) += \ + fscache_cache.o \ + fscache_cookie.o \ + fscache_io.o \ + fscache_main.o \ + fscache_volume.o + +ifeq ($(CONFIG_PROC_FS),y) +netfs-$(CONFIG_FSCACHE) += fscache_proc.o +endif +netfs-$(CONFIG_FSCACHE_STATS) += fscache_stats.o + +obj-$(CONFIG_NETFS_SUPPORT) += netfs.o diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 2cd3ccf4c439960053e436d63792bc5bf7a914de..a59e7b2edaacdcb251765793f14e87eb93a60bb3 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -16,6 +16,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; + struct netfs_folio *finfo; struct folio *folio; pgoff_t start_page = rreq->start / PAGE_SIZE; pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; @@ -63,6 +64,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) break; } if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); folio_start_fscache(folio); folio_started = true; } @@ -86,6 +88,15 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (!pg_failed) { flush_dcache_folio(folio); + finfo = netfs_folio_info(folio); + if (finfo) { + trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); + if (finfo->netfs_group) + folio_change_private(folio, finfo->netfs_group); + else + folio_detach_private(folio); + kfree(finfo); + } folio_mark_uptodate(folio); } @@ -147,6 +158,15 @@ static void netfs_rreq_expand(struct netfs_io_request *rreq, } } +/* + * Begin an operation, and fetch the stored zero point value from the cookie if + * available. + */ +static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) +{ + return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); +} + /** * netfs_readahead - Helper to manage a read request * @ractl: The description of the readahead request @@ -180,11 +200,9 @@ void netfs_readahead(struct readahead_control *ractl) if (IS_ERR(rreq)) return; - if (ctx->ops->begin_cache_operation) { - ret = ctx->ops->begin_cache_operation(rreq); - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) - goto cleanup_free; - } + ret = netfs_begin_cache_read(rreq, ctx); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto cleanup_free; netfs_stat(&netfs_n_rh_readahead); trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), @@ -192,6 +210,10 @@ void netfs_readahead(struct readahead_control *ractl) netfs_rreq_expand(rreq, ractl); + /* Set up the output buffer */ + iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages, + rreq->start, rreq->len); + /* Drop the refs on the folios here rather than in the cache or * filesystem. The locks will be dropped in netfs_rreq_unlock(). */ @@ -199,6 +221,7 @@ void netfs_readahead(struct readahead_control *ractl) ; netfs_begin_read(rreq, false); + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); return; cleanup_free: @@ -226,6 +249,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct address_space *mapping = folio_file_mapping(folio); struct netfs_io_request *rreq; struct netfs_inode *ctx = netfs_inode(mapping->host); + struct folio *sink = NULL; int ret; _enter("%lx", folio_index(folio)); @@ -238,15 +262,64 @@ int netfs_read_folio(struct file *file, struct folio *folio) goto alloc_error; } - if (ctx->ops->begin_cache_operation) { - ret = ctx->ops->begin_cache_operation(rreq); - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) - goto discard; - } + ret = netfs_begin_cache_read(rreq, ctx); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto discard; netfs_stat(&netfs_n_rh_readpage); trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); - return netfs_begin_read(rreq, true); + + /* Set up the output buffer */ + if (folio_test_dirty(folio)) { + /* Handle someone trying to read from an unflushed streaming + * write. We fiddle the buffer so that a gap at the beginning + * and/or a gap at the end get copied to, but the middle is + * discarded. + */ + struct netfs_folio *finfo = netfs_folio_info(folio); + struct bio_vec *bvec; + unsigned int from = finfo->dirty_offset; + unsigned int to = from + finfo->dirty_len; + unsigned int off = 0, i = 0; + size_t flen = folio_size(folio); + size_t nr_bvec = flen / PAGE_SIZE + 2; + size_t part; + + ret = -ENOMEM; + bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); + if (!bvec) + goto discard; + + sink = folio_alloc(GFP_KERNEL, 0); + if (!sink) + goto discard; + + trace_netfs_folio(folio, netfs_folio_trace_read_gaps); + + rreq->direct_bv = bvec; + rreq->direct_bv_count = nr_bvec; + if (from > 0) { + bvec_set_folio(&bvec[i++], folio, from, 0); + off = from; + } + while (off < to) { + part = min_t(size_t, to - off, PAGE_SIZE); + bvec_set_folio(&bvec[i++], sink, part, 0); + off += part; + } + if (to < flen) + bvec_set_folio(&bvec[i++], folio, flen - to, to); + iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); + } else { + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, + rreq->start, rreq->len); + } + + ret = netfs_begin_read(rreq, true); + if (sink) + folio_put(sink); + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + return ret < 0 ? ret : 0; discard: netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); @@ -390,11 +463,9 @@ retry: rreq->no_unlock_folio = folio_index(folio); __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); - if (ctx->ops->begin_cache_operation) { - ret = ctx->ops->begin_cache_operation(rreq); - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) - goto error_put; - } + ret = netfs_begin_cache_read(rreq, ctx); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto error_put; netfs_stat(&netfs_n_rh_write_begin); trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); @@ -405,6 +476,10 @@ retry: ractl._nr_pages = folio_nr_pages(folio); netfs_rreq_expand(rreq, &ractl); + /* Set up the output buffer */ + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, + rreq->start, rreq->len); + /* We hold the folio locks, so we can drop the references */ folio_get(folio); while (readahead_folio(&ractl)) @@ -413,6 +488,7 @@ retry: ret = netfs_begin_read(rreq, true); if (ret < 0) goto error; + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_fscache_killable(folio); @@ -434,3 +510,124 @@ error: return ret; } EXPORT_SYMBOL(netfs_write_begin); + +/* + * Preload the data into a page we're proposing to write into. + */ +int netfs_prefetch_for_write(struct file *file, struct folio *folio, + size_t offset, size_t len) +{ + struct netfs_io_request *rreq; + struct address_space *mapping = folio_file_mapping(folio); + struct netfs_inode *ctx = netfs_inode(mapping->host); + unsigned long long start = folio_pos(folio); + size_t flen = folio_size(folio); + int ret; + + _enter("%zx @%llx", flen, start); + + ret = -ENOMEM; + + rreq = netfs_alloc_request(mapping, file, start, flen, + NETFS_READ_FOR_WRITE); + if (IS_ERR(rreq)) { + ret = PTR_ERR(rreq); + goto error; + } + + rreq->no_unlock_folio = folio_index(folio); + __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); + ret = netfs_begin_cache_read(rreq, ctx); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto error_put; + + netfs_stat(&netfs_n_rh_write_begin); + trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); + + /* Set up the output buffer */ + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, + rreq->start, rreq->len); + + ret = netfs_begin_read(rreq, true); + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + return ret; + +error_put: + netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); +error: + _leave(" = %d", ret); + return ret; +} + +/** + * netfs_buffered_read_iter - Filesystem buffered I/O read routine + * @iocb: kernel I/O control block + * @iter: destination for the data read + * + * This is the ->read_iter() routine for all filesystems that can use the page + * cache directly. + * + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be + * returned when no data can be read without waiting for I/O requests to + * complete; it doesn't prevent readahead. + * + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests + * shall be made for the read or for readahead. When no data can be read, + * -EAGAIN shall be returned. When readahead would be triggered, a partial, + * possibly empty read shall be returned. + * + * Return: + * * number of bytes copied, even for partial reads + * * negative error code (or 0 if IOCB_NOIO) if nothing was read + */ +ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct netfs_inode *ictx = netfs_inode(inode); + ssize_t ret; + + if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) + return -EINVAL; + + ret = netfs_start_io_read(inode); + if (ret == 0) { + ret = filemap_read(iocb, iter, 0); + netfs_end_io_read(inode); + } + return ret; +} +EXPORT_SYMBOL(netfs_buffered_read_iter); + +/** + * netfs_file_read_iter - Generic filesystem read routine + * @iocb: kernel I/O control block + * @iter: destination for the data read + * + * This is the ->read_iter() routine for all filesystems that can use the page + * cache directly. + * + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be + * returned when no data can be read without waiting for I/O requests to + * complete; it doesn't prevent readahead. + * + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests + * shall be made for the read or for readahead. When no data can be read, + * -EAGAIN shall be returned. When readahead would be triggered, a partial, + * possibly empty read shall be returned. + * + * Return: + * * number of bytes copied, even for partial reads + * * negative error code (or 0 if IOCB_NOIO) if nothing was read + */ +ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); + + if ((iocb->ki_flags & IOCB_DIRECT) || + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) + return netfs_unbuffered_read_iter(iocb, iter); + + return netfs_buffered_read_iter(iocb, iter); +} +EXPORT_SYMBOL(netfs_file_read_iter); diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c new file mode 100644 index 0000000000000000000000000000000000000000..93dc76f34e39a077a82d235fe5ec69bbc5d6e13d --- /dev/null +++ b/fs/netfs/buffered_write.c @@ -0,0 +1,1253 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Network filesystem high-level write support. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/* + * Determined write method. Adjust netfs_folio_traces if this is changed. + */ +enum netfs_how_to_modify { + NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */ + NETFS_JUST_PREFETCH, /* We have to read the folio anyway */ + NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */ + NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */ + NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */ + NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */ + NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ +}; + +static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); + +static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) +{ + if (netfs_group && !folio_get_private(folio)) + folio_attach_private(folio, netfs_get_group(netfs_group)); +} + +#if IS_ENABLED(CONFIG_FSCACHE) +static void netfs_folio_start_fscache(bool caching, struct folio *folio) +{ + if (caching) + folio_start_fscache(folio); +} +#else +static void netfs_folio_start_fscache(bool caching, struct folio *folio) +{ +} +#endif + +/* + * Decide how we should modify a folio. We might be attempting to do + * write-streaming, in which case we don't want to a local RMW cycle if we can + * avoid it. If we're doing local caching or content crypto, we award that + * priority over avoiding RMW. If the file is open readably, then we also + * assume that we may want to read what we wrote. + */ +static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, + struct file *file, + struct folio *folio, + void *netfs_group, + size_t flen, + size_t offset, + size_t len, + bool maybe_trouble) +{ + struct netfs_folio *finfo = netfs_folio_info(folio); + loff_t pos = folio_file_pos(folio); + + _enter(""); + + if (netfs_folio_group(folio) != netfs_group) + return NETFS_FLUSH_CONTENT; + + if (folio_test_uptodate(folio)) + return NETFS_FOLIO_IS_UPTODATE; + + if (pos >= ctx->zero_point) + return NETFS_MODIFY_AND_CLEAR; + + if (!maybe_trouble && offset == 0 && len >= flen) + return NETFS_WHOLE_FOLIO_MODIFY; + + if (file->f_mode & FMODE_READ) + goto no_write_streaming; + if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) + goto no_write_streaming; + + if (netfs_is_cache_enabled(ctx)) { + /* We don't want to get a streaming write on a file that loses + * caching service temporarily because the backing store got + * culled. + */ + if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) + set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); + goto no_write_streaming; + } + + if (!finfo) + return NETFS_STREAMING_WRITE; + + /* We can continue a streaming write only if it continues on from the + * previous. If it overlaps, we must flush lest we suffer a partial + * copy and disjoint dirty regions. + */ + if (offset == finfo->dirty_offset + finfo->dirty_len) + return NETFS_STREAMING_WRITE_CONT; + return NETFS_FLUSH_CONTENT; + +no_write_streaming: + if (finfo) { + netfs_stat(&netfs_n_wh_wstream_conflict); + return NETFS_FLUSH_CONTENT; + } + return NETFS_JUST_PREFETCH; +} + +/* + * Grab a folio for writing and lock it. Attempt to allocate as large a folio + * as possible to hold as much of the remaining length as possible in one go. + */ +static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, + loff_t pos, size_t part) +{ + pgoff_t index = pos / PAGE_SIZE; + fgf_t fgp_flags = FGP_WRITEBEGIN; + + if (mapping_large_folio_support(mapping)) + fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part); + + return __filemap_get_folio(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} + +/** + * netfs_perform_write - Copy data into the pagecache. + * @iocb: The operation parameters + * @iter: The source buffer + * @netfs_group: Grouping for dirty pages (eg. ceph snaps). + * + * Copy data into pagecache pages attached to the inode specified by @iocb. + * The caller must hold appropriate inode locks. + * + * Dirty pages are tagged with a netfs_folio struct if they're not up to date + * to indicate the range modified. Dirty pages may also be tagged with a + * netfs-specific grouping such that data from an old group gets flushed before + * a new one is started. + */ +ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct address_space *mapping = inode->i_mapping; + struct netfs_inode *ctx = netfs_inode(inode); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .for_sync = true, + .nr_to_write = LONG_MAX, + .range_start = iocb->ki_pos, + .range_end = iocb->ki_pos + iter->count, + }; + struct netfs_io_request *wreq = NULL; + struct netfs_folio *finfo; + struct folio *folio; + enum netfs_how_to_modify howto; + enum netfs_folio_trace trace; + unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; + ssize_t written = 0, ret; + loff_t i_size, pos = iocb->ki_pos, from, to; + size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; + bool maybe_trouble = false; + + if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || + iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) + ) { + if (pos < i_size_read(inode)) { + ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); + if (ret < 0) { + goto out; + } + } + + wbc_attach_fdatawrite_inode(&wbc, mapping->host); + + wreq = netfs_begin_writethrough(iocb, iter->count); + if (IS_ERR(wreq)) { + wbc_detach_inode(&wbc); + ret = PTR_ERR(wreq); + wreq = NULL; + goto out; + } + if (!is_sync_kiocb(iocb)) + wreq->iocb = iocb; + wreq->cleanup = netfs_cleanup_buffered_write; + } + + do { + size_t flen; + size_t offset; /* Offset into pagecache folio */ + size_t part; /* Bytes to write to folio */ + size_t copied; /* Bytes copied from user */ + + ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); + if (unlikely(ret < 0)) + break; + + offset = pos & (max_chunk - 1); + part = min(max_chunk - offset, iov_iter_count(iter)); + + /* Bring in the user pages that we will copy from _first_ lest + * we hit a nasty deadlock on copying from the same page as + * we're writing to, without it being marked uptodate. + * + * Not only is this an optimisation, but it is also required to + * check that the address is actually valid, when atomic + * usercopies are used below. + * + * We rely on the page being held onto long enough by the LRU + * that we can grab it below if this causes it to be read. + */ + ret = -EFAULT; + if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) + break; + + ret = -ENOMEM; + folio = netfs_grab_folio_for_write(mapping, pos, part); + if (!folio) + break; + + flen = folio_size(folio); + offset = pos & (flen - 1); + part = min_t(size_t, flen - offset, part); + + if (signal_pending(current)) { + ret = written ? -EINTR : -ERESTARTSYS; + goto error_folio_unlock; + } + + /* See if we need to prefetch the area we're going to modify. + * We need to do this before we get a lock on the folio in case + * there's more than one writer competing for the same cache + * block. + */ + howto = netfs_how_to_modify(ctx, file, folio, netfs_group, + flen, offset, part, maybe_trouble); + _debug("howto %u", howto); + switch (howto) { + case NETFS_JUST_PREFETCH: + ret = netfs_prefetch_for_write(file, folio, offset, part); + if (ret < 0) { + _debug("prefetch = %zd", ret); + goto error_folio_unlock; + } + break; + case NETFS_FOLIO_IS_UPTODATE: + case NETFS_WHOLE_FOLIO_MODIFY: + case NETFS_STREAMING_WRITE_CONT: + break; + case NETFS_MODIFY_AND_CLEAR: + zero_user_segment(&folio->page, 0, offset); + break; + case NETFS_STREAMING_WRITE: + ret = -EIO; + if (WARN_ON(folio_get_private(folio))) + goto error_folio_unlock; + break; + case NETFS_FLUSH_CONTENT: + trace_netfs_folio(folio, netfs_flush_content); + from = folio_pos(folio); + to = from + folio_size(folio) - 1; + folio_unlock(folio); + folio_put(folio); + ret = filemap_write_and_wait_range(mapping, from, to); + if (ret < 0) + goto error_folio_unlock; + continue; + } + + if (mapping_writably_mapped(mapping)) + flush_dcache_folio(folio); + + copied = copy_folio_from_iter_atomic(folio, offset, part, iter); + + flush_dcache_folio(folio); + + /* Deal with a (partially) failed copy */ + if (copied == 0) { + ret = -EFAULT; + goto error_folio_unlock; + } + + trace = (enum netfs_folio_trace)howto; + switch (howto) { + case NETFS_FOLIO_IS_UPTODATE: + case NETFS_JUST_PREFETCH: + netfs_set_group(folio, netfs_group); + break; + case NETFS_MODIFY_AND_CLEAR: + zero_user_segment(&folio->page, offset + copied, flen); + netfs_set_group(folio, netfs_group); + folio_mark_uptodate(folio); + break; + case NETFS_WHOLE_FOLIO_MODIFY: + if (unlikely(copied < part)) { + maybe_trouble = true; + iov_iter_revert(iter, copied); + copied = 0; + goto retry; + } + netfs_set_group(folio, netfs_group); + folio_mark_uptodate(folio); + break; + case NETFS_STREAMING_WRITE: + if (offset == 0 && copied == flen) { + netfs_set_group(folio, netfs_group); + folio_mark_uptodate(folio); + trace = netfs_streaming_filled_page; + break; + } + finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); + if (!finfo) { + iov_iter_revert(iter, copied); + ret = -ENOMEM; + goto error_folio_unlock; + } + finfo->netfs_group = netfs_get_group(netfs_group); + finfo->dirty_offset = offset; + finfo->dirty_len = copied; + folio_attach_private(folio, (void *)((unsigned long)finfo | + NETFS_FOLIO_INFO)); + break; + case NETFS_STREAMING_WRITE_CONT: + finfo = netfs_folio_info(folio); + finfo->dirty_len += copied; + if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { + if (finfo->netfs_group) + folio_change_private(folio, finfo->netfs_group); + else + folio_detach_private(folio); + folio_mark_uptodate(folio); + kfree(finfo); + trace = netfs_streaming_cont_filled_page; + } + break; + default: + WARN(true, "Unexpected modify type %u ix=%lx\n", + howto, folio_index(folio)); + ret = -EIO; + goto error_folio_unlock; + } + + trace_netfs_folio(folio, trace); + + /* Update the inode size if we moved the EOF marker */ + i_size = i_size_read(inode); + pos += copied; + if (pos > i_size) { + if (ctx->ops->update_i_size) { + ctx->ops->update_i_size(inode, pos); + } else { + i_size_write(inode, pos); +#if IS_ENABLED(CONFIG_FSCACHE) + fscache_update_cookie(ctx->cache, NULL, &pos); +#endif + } + } + written += copied; + + if (likely(!wreq)) { + folio_mark_dirty(folio); + } else { + if (folio_test_dirty(folio)) + /* Sigh. mmap. */ + folio_clear_dirty_for_io(folio); + /* We make multiple writes to the folio... */ + if (!folio_test_writeback(folio)) { + folio_wait_fscache(folio); + folio_start_writeback(folio); + folio_start_fscache(folio); + if (wreq->iter.count == 0) + trace_netfs_folio(folio, netfs_folio_trace_wthru); + else + trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); + } + netfs_advance_writethrough(wreq, copied, + offset + copied == flen); + } + retry: + folio_unlock(folio); + folio_put(folio); + folio = NULL; + + cond_resched(); + } while (iov_iter_count(iter)); + +out: + if (unlikely(wreq)) { + ret = netfs_end_writethrough(wreq, iocb); + wbc_detach_inode(&wbc); + if (ret == -EIOCBQUEUED) + return ret; + } + + iocb->ki_pos += written; + _leave(" = %zd [%zd]", written, ret); + return written ? written : ret; + +error_folio_unlock: + folio_unlock(folio); + folio_put(folio); + goto out; +} +EXPORT_SYMBOL(netfs_perform_write); + +/** + * netfs_buffered_write_iter_locked - write data to a file + * @iocb: IO state structure (file, offset, etc.) + * @from: iov_iter with data to write + * @netfs_group: Grouping for dirty pages (eg. ceph snaps). + * + * This function does all the work needed for actually writing data to a + * file. It does all basic checks, removes SUID from the file, updates + * modification times and calls proper subroutines depending on whether we + * do direct IO or a standard buffered write. + * + * The caller must hold appropriate locks around this function and have called + * generic_write_checks() already. The caller is also responsible for doing + * any necessary syncing afterwards. + * + * This function does *not* take care of syncing data in case of O_SYNC write. + * A caller has to handle it. This is mainly due to the fact that we want to + * avoid syncing under i_rwsem. + * + * Return: + * * number of bytes written, even for truncated writes + * * negative error code if no data has been written at all + */ +ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, + struct netfs_group *netfs_group) +{ + struct file *file = iocb->ki_filp; + ssize_t ret; + + trace_netfs_write_iter(iocb, from); + + ret = file_remove_privs(file); + if (ret) + return ret; + + ret = file_update_time(file); + if (ret) + return ret; + + return netfs_perform_write(iocb, from, netfs_group); +} +EXPORT_SYMBOL(netfs_buffered_write_iter_locked); + +/** + * netfs_file_write_iter - write data to a file + * @iocb: IO state structure + * @from: iov_iter with data to write + * + * Perform a write to a file, writing into the pagecache if possible and doing + * an unbuffered write instead if not. + * + * Return: + * * Negative error code if no data has been written at all of + * vfs_fsync_range() failed for a synchronous write + * * Number of bytes written, even for truncated writes + */ +ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct netfs_inode *ictx = netfs_inode(inode); + ssize_t ret; + + _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + + if ((iocb->ki_flags & IOCB_DIRECT) || + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) + return netfs_unbuffered_write_iter(iocb, from); + + ret = netfs_start_io_write(inode); + if (ret < 0) + return ret; + + ret = generic_write_checks(iocb, from); + if (ret > 0) + ret = netfs_buffered_write_iter_locked(iocb, from, NULL); + netfs_end_io_write(inode); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + return ret; +} +EXPORT_SYMBOL(netfs_file_write_iter); + +/* + * Notification that a previously read-only page is about to become writable. + * Note that the caller indicates a single page of a multipage folio. + */ +vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) +{ + struct folio *folio = page_folio(vmf->page); + struct file *file = vmf->vma->vm_file; + struct inode *inode = file_inode(file); + vm_fault_t ret = VM_FAULT_RETRY; + int err; + + _enter("%lx", folio->index); + + sb_start_pagefault(inode->i_sb); + + if (folio_wait_writeback_killable(folio)) + goto out; + + if (folio_lock_killable(folio) < 0) + goto out; + + /* Can we see a streaming write here? */ + if (WARN_ON(!folio_test_uptodate(folio))) { + ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED; + goto out; + } + + if (netfs_folio_group(folio) != netfs_group) { + folio_unlock(folio); + err = filemap_fdatawait_range(inode->i_mapping, + folio_pos(folio), + folio_pos(folio) + folio_size(folio)); + switch (err) { + case 0: + ret = VM_FAULT_RETRY; + goto out; + case -ENOMEM: + ret = VM_FAULT_OOM; + goto out; + default: + ret = VM_FAULT_SIGBUS; + goto out; + } + } + + if (folio_test_dirty(folio)) + trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); + else + trace_netfs_folio(folio, netfs_folio_trace_mkwrite); + netfs_set_group(folio, netfs_group); + file_update_time(file); + ret = VM_FAULT_LOCKED; +out: + sb_end_pagefault(inode->i_sb); + return ret; +} +EXPORT_SYMBOL(netfs_page_mkwrite); + +/* + * Kill all the pages in the given range + */ +static void netfs_kill_pages(struct address_space *mapping, + loff_t start, loff_t len) +{ + struct folio *folio; + pgoff_t index = start / PAGE_SIZE; + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; + + _enter("%llx-%llx", start, start + len - 1); + + do { + _debug("kill %lx (to %lx)", index, last); + + folio = filemap_get_folio(mapping, index); + if (IS_ERR(folio)) { + next = index + 1; + continue; + } + + next = folio_next_index(folio); + + trace_netfs_folio(folio, netfs_folio_trace_kill); + folio_clear_uptodate(folio); + if (folio_test_fscache(folio)) + folio_end_fscache(folio); + folio_end_writeback(folio); + folio_lock(folio); + generic_error_remove_folio(mapping, folio); + folio_unlock(folio); + folio_put(folio); + + } while (index = next, index <= last); + + _leave(""); +} + +/* + * Redirty all the pages in a given range. + */ +static void netfs_redirty_pages(struct address_space *mapping, + loff_t start, loff_t len) +{ + struct folio *folio; + pgoff_t index = start / PAGE_SIZE; + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; + + _enter("%llx-%llx", start, start + len - 1); + + do { + _debug("redirty %llx @%llx", len, start); + + folio = filemap_get_folio(mapping, index); + if (IS_ERR(folio)) { + next = index + 1; + continue; + } + + next = folio_next_index(folio); + trace_netfs_folio(folio, netfs_folio_trace_redirty); + filemap_dirty_folio(mapping, folio); + if (folio_test_fscache(folio)) + folio_end_fscache(folio); + folio_end_writeback(folio); + folio_put(folio); + } while (index = next, index <= last); + + balance_dirty_pages_ratelimited(mapping); + + _leave(""); +} + +/* + * Completion of write to server + */ +static void netfs_pages_written_back(struct netfs_io_request *wreq) +{ + struct address_space *mapping = wreq->mapping; + struct netfs_folio *finfo; + struct netfs_group *group = NULL; + struct folio *folio; + pgoff_t last; + int gcount = 0; + + XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); + + _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); + + rcu_read_lock(); + + last = (wreq->start + wreq->len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, last) { + WARN(!folio_test_writeback(folio), + "bad %zx @%llx page %lx %lx\n", + wreq->len, wreq->start, folio_index(folio), last); + + if ((finfo = netfs_folio_info(folio))) { + /* Streaming writes cannot be redirtied whilst under + * writeback, so discard the streaming record. + */ + folio_detach_private(folio); + group = finfo->netfs_group; + gcount++; + trace_netfs_folio(folio, netfs_folio_trace_clear_s); + kfree(finfo); + } else if ((group = netfs_folio_group(folio))) { + /* Need to detach the group pointer if the page didn't + * get redirtied. If it has been redirtied, then it + * must be within the same group. + */ + if (folio_test_dirty(folio)) { + trace_netfs_folio(folio, netfs_folio_trace_redirtied); + goto end_wb; + } + if (folio_trylock(folio)) { + if (!folio_test_dirty(folio)) { + folio_detach_private(folio); + gcount++; + trace_netfs_folio(folio, netfs_folio_trace_clear_g); + } else { + trace_netfs_folio(folio, netfs_folio_trace_redirtied); + } + folio_unlock(folio); + goto end_wb; + } + + xas_pause(&xas); + rcu_read_unlock(); + folio_lock(folio); + if (!folio_test_dirty(folio)) { + folio_detach_private(folio); + gcount++; + trace_netfs_folio(folio, netfs_folio_trace_clear_g); + } else { + trace_netfs_folio(folio, netfs_folio_trace_redirtied); + } + folio_unlock(folio); + rcu_read_lock(); + } else { + trace_netfs_folio(folio, netfs_folio_trace_clear); + } + end_wb: + if (folio_test_fscache(folio)) + folio_end_fscache(folio); + xas_advance(&xas, folio_next_index(folio) - 1); + folio_end_writeback(folio); + } + + rcu_read_unlock(); + netfs_put_group_many(group, gcount); + _leave(""); +} + +/* + * Deal with the disposition of the folios that are under writeback to close + * out the operation. + */ +static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) +{ + struct address_space *mapping = wreq->mapping; + + _enter(""); + + switch (wreq->error) { + case 0: + netfs_pages_written_back(wreq); + break; + + default: + pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); + fallthrough; + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + case -ENETRESET: + case -EDQUOT: + case -ENOSPC: + netfs_redirty_pages(mapping, wreq->start, wreq->len); + break; + + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + netfs_kill_pages(mapping, wreq->start, wreq->len); + break; + } + + if (wreq->error) + mapping_set_error(mapping, wreq->error); + if (wreq->netfs_ops->done) + wreq->netfs_ops->done(wreq); +} + +/* + * Extend the region to be written back to include subsequent contiguously + * dirty pages if possible, but don't sleep while doing so. + * + * If this page holds new content, then we can include filler zeros in the + * writeback. + */ +static void netfs_extend_writeback(struct address_space *mapping, + struct netfs_group *group, + struct xa_state *xas, + long *_count, + loff_t start, + loff_t max_len, + bool caching, + size_t *_len, + size_t *_top) +{ + struct netfs_folio *finfo; + struct folio_batch fbatch; + struct folio *folio; + unsigned int i; + pgoff_t index = (start + *_len) / PAGE_SIZE; + size_t len; + void *priv; + bool stop = true; + + folio_batch_init(&fbatch); + + do { + /* Firstly, we gather up a batch of contiguous dirty pages + * under the RCU read lock - but we can't clear the dirty flags + * there if any of those pages are mapped. + */ + rcu_read_lock(); + + xas_for_each(xas, folio, ULONG_MAX) { + stop = true; + if (xas_retry(xas, folio)) + continue; + if (xa_is_value(folio)) + break; + if (folio_index(folio) != index) { + xas_reset(xas); + break; + } + + if (!folio_try_get_rcu(folio)) { + xas_reset(xas); + continue; + } + + /* Has the folio moved or been split? */ + if (unlikely(folio != xas_reload(xas))) { + folio_put(folio); + xas_reset(xas); + break; + } + + if (!folio_trylock(folio)) { + folio_put(folio); + xas_reset(xas); + break; + } + if (!folio_test_dirty(folio) || + folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + folio_put(folio); + xas_reset(xas); + break; + } + + stop = false; + len = folio_size(folio); + priv = folio_get_private(folio); + if ((const struct netfs_group *)priv != group) { + stop = true; + finfo = netfs_folio_info(folio); + if (finfo->netfs_group != group || + finfo->dirty_offset > 0) { + folio_unlock(folio); + folio_put(folio); + xas_reset(xas); + break; + } + len = finfo->dirty_len; + } + + *_top += folio_size(folio); + index += folio_nr_pages(folio); + *_count -= folio_nr_pages(folio); + *_len += len; + if (*_len >= max_len || *_count <= 0) + stop = true; + + if (!folio_batch_add(&fbatch, folio)) + break; + if (stop) + break; + } + + xas_pause(xas); + rcu_read_unlock(); + + /* Now, if we obtained any folios, we can shift them to being + * writable and mark them for caching. + */ + if (!folio_batch_count(&fbatch)) + break; + + for (i = 0; i < folio_batch_count(&fbatch); i++) { + folio = fbatch.folios[i]; + trace_netfs_folio(folio, netfs_folio_trace_store_plus); + + if (!folio_clear_dirty_for_io(folio)) + BUG(); + folio_start_writeback(folio); + netfs_folio_start_fscache(caching, folio); + folio_unlock(folio); + } + + folio_batch_release(&fbatch); + cond_resched(); + } while (!stop); +} + +/* + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. + */ +static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, + struct writeback_control *wbc, + struct netfs_group *group, + struct xa_state *xas, + struct folio *folio, + unsigned long long start, + unsigned long long end) +{ + struct netfs_io_request *wreq; + struct netfs_folio *finfo; + struct netfs_inode *ctx = netfs_inode(mapping->host); + unsigned long long i_size = i_size_read(&ctx->inode); + size_t len, max_len; + bool caching = netfs_is_cache_enabled(ctx); + long count = wbc->nr_to_write; + int ret; + + _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching); + + wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), + NETFS_WRITEBACK); + if (IS_ERR(wreq)) { + folio_unlock(folio); + return PTR_ERR(wreq); + } + + if (!folio_clear_dirty_for_io(folio)) + BUG(); + folio_start_writeback(folio); + netfs_folio_start_fscache(caching, folio); + + count -= folio_nr_pages(folio); + + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ + trace_netfs_folio(folio, netfs_folio_trace_store); + + len = wreq->len; + finfo = netfs_folio_info(folio); + if (finfo) { + start += finfo->dirty_offset; + if (finfo->dirty_offset + finfo->dirty_len != len) { + len = finfo->dirty_len; + goto cant_expand; + } + len = finfo->dirty_len; + } + + if (start < i_size) { + /* Trim the write to the EOF; the extra data is ignored. Also + * put an upper limit on the size of a single storedata op. + */ + max_len = 65536 * 4096; + max_len = min_t(unsigned long long, max_len, end - start + 1); + max_len = min_t(unsigned long long, max_len, i_size - start); + + if (len < max_len) + netfs_extend_writeback(mapping, group, xas, &count, start, + max_len, caching, &len, &wreq->upper_len); + } + +cant_expand: + len = min_t(unsigned long long, len, i_size - start); + + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + folio_unlock(folio); + wreq->start = start; + wreq->len = len; + + if (start < i_size) { + _debug("write back %zx @%llx [%llx]", len, start, i_size); + + /* Speculatively write to the cache. We have to fix this up + * later if the store fails. + */ + wreq->cleanup = netfs_cleanup_buffered_write; + + iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, + wreq->upper_len); + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); + ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); + if (ret == 0 || ret == -EIOCBQUEUED) + wbc->nr_to_write -= len / PAGE_SIZE; + } else { + _debug("write discard %zx @%llx [%llx]", len, start, i_size); + + /* The dirty region was entirely beyond the EOF. */ + fscache_clear_page_bits(mapping, start, len, caching); + netfs_pages_written_back(wreq); + ret = 0; + } + + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + _leave(" = 1"); + return 1; +} + +/* + * Write a region of pages back to the server + */ +static ssize_t netfs_writepages_begin(struct address_space *mapping, + struct writeback_control *wbc, + struct netfs_group *group, + struct xa_state *xas, + unsigned long long *_start, + unsigned long long end) +{ + const struct netfs_folio *finfo; + struct folio *folio; + unsigned long long start = *_start; + ssize_t ret; + void *priv; + int skips = 0; + + _enter("%llx,%llx,", start, end); + +search_again: + /* Find the first dirty page in the group. */ + rcu_read_lock(); + + for (;;) { + folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); + if (xas_retry(xas, folio) || xa_is_value(folio)) + continue; + if (!folio) + break; + + if (!folio_try_get_rcu(folio)) { + xas_reset(xas); + continue; + } + + if (unlikely(folio != xas_reload(xas))) { + folio_put(folio); + xas_reset(xas); + continue; + } + + /* Skip any dirty folio that's not in the group of interest. */ + priv = folio_get_private(folio); + if ((const struct netfs_group *)priv != group) { + finfo = netfs_folio_info(folio); + if (finfo->netfs_group != group) { + folio_put(folio); + continue; + } + } + + xas_pause(xas); + break; + } + rcu_read_unlock(); + if (!folio) + return 0; + + start = folio_pos(folio); /* May regress with THPs */ + + _debug("wback %lx", folio_index(folio)); + + /* At this point we hold neither the i_pages lock nor the page lock: + * the page may be truncated or invalidated (changing page->mapping to + * NULL), or even swizzled back from swapper_space to tmpfs file + * mapping + */ +lock_again: + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) + return ret; + } else { + if (!folio_trylock(folio)) + goto search_again; + } + + if (folio->mapping != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + goto search_again; + } + + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode != WB_SYNC_NONE) { + folio_wait_writeback(folio); +#ifdef CONFIG_FSCACHE + folio_wait_fscache(folio); +#endif + goto lock_again; + } + + start += folio_size(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { + if (skips >= 5 || need_resched()) { + ret = 0; + goto out; + } + skips++; + } + goto search_again; + } + + ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, + folio, start, end); +out: + if (ret > 0) + *_start = start + ret; + _leave(" = %zd [%llx]", ret, *_start); + return ret; +} + +/* + * Write a region of pages back to the server + */ +static int netfs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + struct netfs_group *group, + unsigned long long *_start, + unsigned long long end) +{ + ssize_t ret; + + XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); + + do { + ret = netfs_writepages_begin(mapping, wbc, group, &xas, + _start, end); + if (ret > 0 && wbc->nr_to_write > 0) + cond_resched(); + } while (ret > 0 && wbc->nr_to_write > 0); + + return ret > 0 ? 0 : ret; +} + +/* + * write some of the pending data back to the server + */ +int netfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct netfs_group *group = NULL; + loff_t start, end; + int ret; + + _enter(""); + + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + + if (wbc->range_cyclic && mapping->writeback_index) { + start = mapping->writeback_index * PAGE_SIZE; + ret = netfs_writepages_region(mapping, wbc, group, + &start, LLONG_MAX); + if (ret < 0) + goto out; + + if (wbc->nr_to_write <= 0) { + mapping->writeback_index = start / PAGE_SIZE; + goto out; + } + + start = 0; + end = mapping->writeback_index * PAGE_SIZE; + mapping->writeback_index = 0; + ret = netfs_writepages_region(mapping, wbc, group, &start, end); + if (ret == 0) + mapping->writeback_index = start / PAGE_SIZE; + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + start = 0; + ret = netfs_writepages_region(mapping, wbc, group, + &start, LLONG_MAX); + if (wbc->nr_to_write > 0 && ret == 0) + mapping->writeback_index = start / PAGE_SIZE; + } else { + start = wbc->range_start; + ret = netfs_writepages_region(mapping, wbc, group, + &start, wbc->range_end); + } + +out: + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(netfs_writepages); + +/* + * Deal with the disposition of a laundered folio. + */ +static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) +{ + if (wreq->error) { + pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); + mapping_set_error(wreq->mapping, wreq->error); + } +} + +/** + * netfs_launder_folio - Clean up a dirty folio that's being invalidated + * @folio: The folio to clean + * + * This is called to write back a folio that's being invalidated when an inode + * is getting torn down. Ideally, writepages would be used instead. + */ +int netfs_launder_folio(struct folio *folio) +{ + struct netfs_io_request *wreq; + struct address_space *mapping = folio->mapping; + struct netfs_folio *finfo = netfs_folio_info(folio); + struct netfs_group *group = netfs_folio_group(folio); + struct bio_vec bvec; + unsigned long long i_size = i_size_read(mapping->host); + unsigned long long start = folio_pos(folio); + size_t offset = 0, len; + int ret = 0; + + if (finfo) { + offset = finfo->dirty_offset; + start += offset; + len = finfo->dirty_len; + } else { + len = folio_size(folio); + } + len = min_t(unsigned long long, len, i_size - start); + + wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); + if (IS_ERR(wreq)) { + ret = PTR_ERR(wreq); + goto out; + } + + if (!folio_clear_dirty_for_io(folio)) + goto out_put; + + trace_netfs_folio(folio, netfs_folio_trace_launder); + + _debug("launder %llx-%llx", start, start + len - 1); + + /* Speculatively write to the cache. We have to fix this up later if + * the store fails. + */ + wreq->cleanup = netfs_cleanup_launder_folio; + + bvec_set_folio(&bvec, folio, len, offset); + iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); + ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); + +out_put: + folio_detach_private(folio); + netfs_put_group(group); + kfree(finfo); + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); +out: + folio_wait_fscache(folio); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(netfs_launder_folio); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c new file mode 100644 index 0000000000000000000000000000000000000000..ad4370b3935d6ee1678e82f05926b990f05edc1d --- /dev/null +++ b/fs/netfs/direct_read.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Direct I/O support. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/** + * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read + * @iocb: The I/O control descriptor describing the read + * @iter: The output buffer (also specifies read length) + * + * Perform an unbuffered I/O or direct I/O from the file in @iocb to the + * output buffer. No use is made of the pagecache. + * + * The caller must hold any appropriate locks. + */ +static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter) +{ + struct netfs_io_request *rreq; + ssize_t ret; + size_t orig_count = iov_iter_count(iter); + bool async = !is_sync_kiocb(iocb); + + _enter(""); + + if (!orig_count) + return 0; /* Don't update atime */ + + ret = kiocb_write_and_wait(iocb, orig_count); + if (ret < 0) + return ret; + file_accessed(iocb->ki_filp); + + rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, + iocb->ki_pos, orig_count, + NETFS_DIO_READ); + if (IS_ERR(rreq)) + return PTR_ERR(rreq); + + netfs_stat(&netfs_n_rh_dio_read); + trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read); + + /* If this is an async op, we have to keep track of the destination + * buffer for ourselves as the caller's iterator will be trashed when + * we return. + * + * In such a case, extract an iterator to represent as much of the the + * output buffer as we can manage. Note that the extraction might not + * be able to allocate a sufficiently large bvec array and may shorten + * the request. + */ + if (user_backed_iter(iter)) { + ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0); + if (ret < 0) + goto out; + rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec; + rreq->direct_bv_count = ret; + rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); + rreq->len = iov_iter_count(&rreq->iter); + } else { + rreq->iter = *iter; + rreq->len = orig_count; + rreq->direct_bv_unpin = false; + iov_iter_advance(iter, orig_count); + } + + // TODO: Set up bounce buffer if needed + + if (async) + rreq->iocb = iocb; + + ret = netfs_begin_read(rreq, is_sync_kiocb(iocb)); + if (ret < 0) + goto out; /* May be -EIOCBQUEUED */ + if (!async) { + // TODO: Copy from bounce buffer + iocb->ki_pos += rreq->transferred; + ret = rreq->transferred; + } + +out: + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + if (ret > 0) + orig_count -= ret; + if (ret != -EIOCBQUEUED) + iov_iter_revert(iter, orig_count - iov_iter_count(iter)); + return ret; +} + +/** + * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read + * @iocb: The I/O control descriptor describing the read + * @iter: The output buffer (also specifies read length) + * + * Perform an unbuffered I/O or direct I/O from the file in @iocb to the + * output buffer. No use is made of the pagecache. + */ +ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + ssize_t ret; + + if (!iter->count) + return 0; /* Don't update atime */ + + ret = netfs_start_io_direct(inode); + if (ret == 0) { + ret = netfs_unbuffered_read_iter_locked(iocb, iter); + netfs_end_io_direct(inode); + } + return ret; +} +EXPORT_SYMBOL(netfs_unbuffered_read_iter); diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c new file mode 100644 index 0000000000000000000000000000000000000000..60a40d293c87f5fd1088830f07488775b8725bb4 --- /dev/null +++ b/fs/netfs/direct_write.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Unbuffered and direct write support. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include "internal.h" + +static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) +{ + struct inode *inode = wreq->inode; + unsigned long long end = wreq->start + wreq->len; + + if (!wreq->error && + i_size_read(inode) < end) { + if (wreq->netfs_ops->update_i_size) + wreq->netfs_ops->update_i_size(inode, end); + else + i_size_write(inode, end); + } +} + +/* + * Perform an unbuffered write where we may have to do an RMW operation on an + * encrypted file. This can also be used for direct I/O writes. + */ +static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group) +{ + struct netfs_io_request *wreq; + unsigned long long start = iocb->ki_pos; + unsigned long long end = start + iov_iter_count(iter); + ssize_t ret, n; + bool async = !is_sync_kiocb(iocb); + + _enter(""); + + /* We're going to need a bounce buffer if what we transmit is going to + * be different in some way to the source buffer, e.g. because it gets + * encrypted/compressed or because it needs expanding to a block size. + */ + // TODO + + _debug("uw %llx-%llx", start, end); + + wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, + start, end - start, + iocb->ki_flags & IOCB_DIRECT ? + NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); + if (IS_ERR(wreq)) + return PTR_ERR(wreq); + + { + /* If this is an async op and we're not using a bounce buffer, + * we have to save the source buffer as the iterator is only + * good until we return. In such a case, extract an iterator + * to represent as much of the the output buffer as we can + * manage. Note that the extraction might not be able to + * allocate a sufficiently large bvec array and may shorten the + * request. + */ + if (async || user_backed_iter(iter)) { + n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0); + if (n < 0) { + ret = n; + goto out; + } + wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec; + wreq->direct_bv_count = n; + wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); + wreq->len = iov_iter_count(&wreq->iter); + } else { + wreq->iter = *iter; + } + + wreq->io_iter = wreq->iter; + } + + /* Copy the data into the bounce buffer and encrypt it. */ + // TODO + + /* Dispatch the write. */ + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); + if (async) + wreq->iocb = iocb; + wreq->cleanup = netfs_cleanup_dio_write; + ret = netfs_begin_write(wreq, is_sync_kiocb(iocb), + iocb->ki_flags & IOCB_DIRECT ? + netfs_write_trace_dio_write : + netfs_write_trace_unbuffered_write); + if (ret < 0) { + _debug("begin = %zd", ret); + goto out; + } + + if (!async) { + trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, + TASK_UNINTERRUPTIBLE); + + ret = wreq->error; + _debug("waited = %zd", ret); + if (ret == 0) { + ret = wreq->transferred; + iocb->ki_pos += ret; + } + } else { + ret = -EIOCBQUEUED; + } + +out: + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + return ret; +} + +/** + * netfs_unbuffered_write_iter - Unbuffered write to a file + * @iocb: IO state structure + * @from: iov_iter with data to write + * + * Do an unbuffered write to a file, writing the data directly to the server + * and not lodging the data in the pagecache. + * + * Return: + * * Negative error code if no data has been written at all of + * vfs_fsync_range() failed for a synchronous write + * * Number of bytes written, even for truncated writes + */ +ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct netfs_inode *ictx = netfs_inode(inode); + unsigned long long end; + ssize_t ret; + + _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + + trace_netfs_write_iter(iocb, from); + netfs_stat(&netfs_n_rh_dio_write); + + ret = netfs_start_io_direct(inode); + if (ret < 0) + return ret; + ret = generic_write_checks(iocb, from); + if (ret < 0) + goto out; + ret = file_remove_privs(file); + if (ret < 0) + goto out; + ret = file_update_time(file); + if (ret < 0) + goto out; + ret = kiocb_invalidate_pages(iocb, iov_iter_count(from)); + if (ret < 0) + goto out; + end = iocb->ki_pos + iov_iter_count(from); + if (end > ictx->zero_point) + ictx->zero_point = end; + + fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode), + FSCACHE_INVAL_DIO_WRITE); + ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL); +out: + netfs_end_io_direct(inode); + return ret; +} +EXPORT_SYMBOL(netfs_unbuffered_write_iter); diff --git a/fs/fscache/cache.c b/fs/netfs/fscache_cache.c similarity index 100% rename from fs/fscache/cache.c rename to fs/netfs/fscache_cache.c diff --git a/fs/fscache/cookie.c b/fs/netfs/fscache_cookie.c similarity index 100% rename from fs/fscache/cookie.c rename to fs/netfs/fscache_cookie.c diff --git a/fs/netfs/fscache_internal.h b/fs/netfs/fscache_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..a09b948fcef212c761d4e7e1c68078e5e5a294ad --- /dev/null +++ b/fs/netfs/fscache_internal.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Internal definitions for FS-Cache + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include "internal.h" + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "FS-Cache: " fmt diff --git a/fs/fscache/io.c b/fs/netfs/fscache_io.c similarity index 86% rename from fs/fscache/io.c rename to fs/netfs/fscache_io.c index 0d2b8dec8f82cd040391b01f814e175cc39eeae9..ad572f7ee897b9d26d2439a6a1178332d2a2e547 100644 --- a/fs/fscache/io.c +++ b/fs/netfs/fscache_io.c @@ -158,46 +158,6 @@ int __fscache_begin_write_operation(struct netfs_cache_resources *cres, } EXPORT_SYMBOL(__fscache_begin_write_operation); -/** - * fscache_dirty_folio - Mark folio dirty and pin a cache object for writeback - * @mapping: The mapping the folio belongs to. - * @folio: The folio being dirtied. - * @cookie: The cookie referring to the cache object - * - * Set the dirty flag on a folio and pin an in-use cache object in memory - * so that writeback can later write to it. This is intended - * to be called from the filesystem's ->dirty_folio() method. - * - * Return: true if the dirty flag was set on the folio, false otherwise. - */ -bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, - struct fscache_cookie *cookie) -{ - struct inode *inode = mapping->host; - bool need_use = false; - - _enter(""); - - if (!filemap_dirty_folio(mapping, folio)) - return false; - if (!fscache_cookie_valid(cookie)) - return true; - - if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { - spin_lock(&inode->i_lock); - if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { - inode->i_state |= I_PINNING_FSCACHE_WB; - need_use = true; - } - spin_unlock(&inode->i_lock); - - if (need_use) - fscache_use_cookie(cookie, true); - } - return true; -} -EXPORT_SYMBOL(fscache_dirty_folio); - struct fscache_write_request { struct netfs_cache_resources cache_resources; struct address_space *mapping; @@ -277,7 +237,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, fscache_access_io_write) < 0) goto abandon_free; - ret = cres->ops->prepare_write(cres, &start, &len, i_size, false); + ret = cres->ops->prepare_write(cres, &start, &len, len, i_size, false); if (ret < 0) goto abandon_end; diff --git a/fs/fscache/main.c b/fs/netfs/fscache_main.c similarity index 84% rename from fs/fscache/main.c rename to fs/netfs/fscache_main.c index dad85fd84f6f9f9245112b7bdcea4305313c8950..42e98bb523e369f8251146bb7a3b9802c4874b3d 100644 --- a/fs/fscache/main.c +++ b/fs/netfs/fscache_main.c @@ -8,18 +8,9 @@ #define FSCACHE_DEBUG_LEVEL CACHE #include #include -#define CREATE_TRACE_POINTS #include "internal.h" - -MODULE_DESCRIPTION("FS Cache Manager"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - -unsigned fscache_debug; -module_param_named(debug, fscache_debug, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(fscache_debug, - "FS-Cache debugging mask"); +#define CREATE_TRACE_POINTS +#include EXPORT_TRACEPOINT_SYMBOL(fscache_access_cache); EXPORT_TRACEPOINT_SYMBOL(fscache_access_volume); @@ -71,7 +62,7 @@ unsigned int fscache_hash(unsigned int salt, const void *data, size_t len) /* * initialise the fs caching module */ -static int __init fscache_init(void) +int __init fscache_init(void) { int ret = -ENOMEM; @@ -92,7 +83,7 @@ static int __init fscache_init(void) goto error_cookie_jar; } - pr_notice("Loaded\n"); + pr_notice("FS-Cache loaded\n"); return 0; error_cookie_jar: @@ -103,19 +94,15 @@ error_wq: return ret; } -fs_initcall(fscache_init); - /* * clean up on module removal */ -static void __exit fscache_exit(void) +void __exit fscache_exit(void) { _enter(""); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); destroy_workqueue(fscache_wq); - pr_notice("Unloaded\n"); + pr_notice("FS-Cache unloaded\n"); } - -module_exit(fscache_exit); diff --git a/fs/fscache/proc.c b/fs/netfs/fscache_proc.c similarity index 58% rename from fs/fscache/proc.c rename to fs/netfs/fscache_proc.c index dc3b0e9c8cce848a4777a5cfbdcf621b4a3688b7..874d951bc39012d487b87e27b641c3591cf51909 100644 --- a/fs/fscache/proc.c +++ b/fs/netfs/fscache_proc.c @@ -12,41 +12,34 @@ #include "internal.h" /* - * initialise the /proc/fs/fscache/ directory + * Add files to /proc/fs/netfs/. */ int __init fscache_proc_init(void) { - if (!proc_mkdir("fs/fscache", NULL)) - goto error_dir; + if (!proc_symlink("fs/fscache", NULL, "netfs")) + goto error_sym; - if (!proc_create_seq("fs/fscache/caches", S_IFREG | 0444, NULL, + if (!proc_create_seq("fs/netfs/caches", S_IFREG | 0444, NULL, &fscache_caches_seq_ops)) goto error; - if (!proc_create_seq("fs/fscache/volumes", S_IFREG | 0444, NULL, + if (!proc_create_seq("fs/netfs/volumes", S_IFREG | 0444, NULL, &fscache_volumes_seq_ops)) goto error; - if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL, + if (!proc_create_seq("fs/netfs/cookies", S_IFREG | 0444, NULL, &fscache_cookies_seq_ops)) goto error; - -#ifdef CONFIG_FSCACHE_STATS - if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL, - fscache_stats_show)) - goto error; -#endif - return 0; error: remove_proc_entry("fs/fscache", NULL); -error_dir: +error_sym: return -ENOMEM; } /* - * clean up the /proc/fs/fscache/ directory + * Clean up the /proc/fs/fscache symlink. */ void fscache_proc_cleanup(void) { diff --git a/fs/fscache/stats.c b/fs/netfs/fscache_stats.c similarity index 90% rename from fs/fscache/stats.c rename to fs/netfs/fscache_stats.c index fc94e5e79f1c6d456bd6e48ac2fe8a5141f70ef6..add21abdf7134983c30a497644c20b14a7a9a8ac 100644 --- a/fs/fscache/stats.c +++ b/fs/netfs/fscache_stats.c @@ -48,13 +48,15 @@ atomic_t fscache_n_no_create_space; EXPORT_SYMBOL(fscache_n_no_create_space); atomic_t fscache_n_culled; EXPORT_SYMBOL(fscache_n_culled); +atomic_t fscache_n_dio_misfit; +EXPORT_SYMBOL(fscache_n_dio_misfit); /* * display the general statistics */ -int fscache_stats_show(struct seq_file *m, void *v) +int fscache_stats_show(struct seq_file *m) { - seq_puts(m, "FS-Cache statistics\n"); + seq_puts(m, "-- FS-Cache statistics --\n"); seq_printf(m, "Cookies: n=%d v=%d vcol=%u voom=%u\n", atomic_read(&fscache_n_cookies), atomic_read(&fscache_n_volumes), @@ -93,10 +95,9 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_no_create_space), atomic_read(&fscache_n_culled)); - seq_printf(m, "IO : rd=%u wr=%u\n", + seq_printf(m, "IO : rd=%u wr=%u mis=%u\n", atomic_read(&fscache_n_read), - atomic_read(&fscache_n_write)); - - netfs_stats_show(m); + atomic_read(&fscache_n_write), + atomic_read(&fscache_n_dio_misfit)); return 0; } diff --git a/fs/fscache/volume.c b/fs/netfs/fscache_volume.c similarity index 100% rename from fs/fscache/volume.c rename to fs/netfs/fscache_volume.c diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 43fac1b14e40cd1351cbac875d1886b0a9256835..ec7045d24400df09bd5a933401a7fbb2d36a3d24 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -5,9 +5,13 @@ * Written by David Howells (dhowells@redhat.com) */ +#include +#include #include #include +#include #include +#include #ifdef pr_fmt #undef pr_fmt @@ -19,6 +23,8 @@ * buffered_read.c */ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq); +int netfs_prefetch_for_write(struct file *file, struct folio *folio, + size_t offset, size_t len); /* * io.c @@ -29,6 +35,41 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync); * main.c */ extern unsigned int netfs_debug; +extern struct list_head netfs_io_requests; +extern spinlock_t netfs_proc_lock; + +#ifdef CONFIG_PROC_FS +static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) +{ + spin_lock(&netfs_proc_lock); + list_add_tail_rcu(&rreq->proc_link, &netfs_io_requests); + spin_unlock(&netfs_proc_lock); +} +static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) +{ + if (!list_empty(&rreq->proc_link)) { + spin_lock(&netfs_proc_lock); + list_del_rcu(&rreq->proc_link); + spin_unlock(&netfs_proc_lock); + } +} +#else +static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {} +static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} +#endif + +/* + * misc.c + */ +#define NETFS_FLAG_PUT_MARK BIT(0) +#define NETFS_FLAG_PAGECACHE_MARK BIT(1) +int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, + struct folio *folio, unsigned int flags, + gfp_t gfp_mask); +int netfs_add_folios_to_buffer(struct xarray *buffer, + struct address_space *mapping, + pgoff_t index, pgoff_t to, gfp_t gfp_mask); +void netfs_clear_buffer(struct xarray *buffer); /* * objects.c @@ -49,10 +90,21 @@ static inline void netfs_see_request(struct netfs_io_request *rreq, trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what); } +/* + * output.c + */ +int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, + enum netfs_write_trace what); +struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); +int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end); +int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb); + /* * stats.c */ #ifdef CONFIG_NETFS_STATS +extern atomic_t netfs_n_rh_dio_read; +extern atomic_t netfs_n_rh_dio_write; extern atomic_t netfs_n_rh_readahead; extern atomic_t netfs_n_rh_readpage; extern atomic_t netfs_n_rh_rreq; @@ -71,7 +123,15 @@ extern atomic_t netfs_n_rh_write_begin; extern atomic_t netfs_n_rh_write_done; extern atomic_t netfs_n_rh_write_failed; extern atomic_t netfs_n_rh_write_zskip; +extern atomic_t netfs_n_wh_wstream_conflict; +extern atomic_t netfs_n_wh_upload; +extern atomic_t netfs_n_wh_upload_done; +extern atomic_t netfs_n_wh_upload_failed; +extern atomic_t netfs_n_wh_write; +extern atomic_t netfs_n_wh_write_done; +extern atomic_t netfs_n_wh_write_failed; +int netfs_stats_show(struct seq_file *m, void *v); static inline void netfs_stat(atomic_t *stat) { @@ -103,6 +163,176 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx) #endif } +/* + * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) +{ + if (netfs_group) + refcount_inc(&netfs_group->ref); + return netfs_group; +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group(struct netfs_group *netfs_group) +{ + if (netfs_group && refcount_dec_and_test(&netfs_group->ref)) + netfs_group->free(netfs_group); +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) +{ + if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref)) + netfs_group->free(netfs_group); +} + +/* + * fscache-cache.c + */ +#ifdef CONFIG_PROC_FS +extern const struct seq_operations fscache_caches_seq_ops; +#endif +bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); +void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); +struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache); +void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where); + +static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache) +{ + return smp_load_acquire(&cache->state); +} + +static inline bool fscache_cache_is_live(const struct fscache_cache *cache) +{ + return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE; +} + +static inline void fscache_set_cache_state(struct fscache_cache *cache, + enum fscache_cache_state new_state) +{ + smp_store_release(&cache->state, new_state); + +} + +static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, + enum fscache_cache_state old_state, + enum fscache_cache_state new_state) +{ + return try_cmpxchg_release(&cache->state, &old_state, new_state); +} + +/* + * fscache-cookie.c + */ +extern struct kmem_cache *fscache_cookie_jar; +#ifdef CONFIG_PROC_FS +extern const struct seq_operations fscache_cookies_seq_ops; +#endif +extern struct timer_list fscache_cookie_lru_timer; + +extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); +extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie, + enum fscache_access_trace why); + +static inline void fscache_see_cookie(struct fscache_cookie *cookie, + enum fscache_cookie_trace where) +{ + trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), + where); +} + +/* + * fscache-main.c + */ +extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len); +#ifdef CONFIG_FSCACHE +int __init fscache_init(void); +void __exit fscache_exit(void); +#else +static inline int fscache_init(void) { return 0; } +static inline void fscache_exit(void) {} +#endif + +/* + * fscache-proc.c + */ +#ifdef CONFIG_PROC_FS +extern int __init fscache_proc_init(void); +extern void fscache_proc_cleanup(void); +#else +#define fscache_proc_init() (0) +#define fscache_proc_cleanup() do {} while (0) +#endif + +/* + * fscache-stats.c + */ +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_volumes; +extern atomic_t fscache_n_volumes_collision; +extern atomic_t fscache_n_volumes_nomem; +extern atomic_t fscache_n_cookies; +extern atomic_t fscache_n_cookies_lru; +extern atomic_t fscache_n_cookies_lru_expired; +extern atomic_t fscache_n_cookies_lru_removed; +extern atomic_t fscache_n_cookies_lru_dropped; + +extern atomic_t fscache_n_acquires; +extern atomic_t fscache_n_acquires_ok; +extern atomic_t fscache_n_acquires_oom; + +extern atomic_t fscache_n_invalidates; + +extern atomic_t fscache_n_relinquishes; +extern atomic_t fscache_n_relinquishes_retire; +extern atomic_t fscache_n_relinquishes_dropped; + +extern atomic_t fscache_n_resizes; +extern atomic_t fscache_n_resizes_null; + +static inline void fscache_stat(atomic_t *stat) +{ + atomic_inc(stat); +} + +static inline void fscache_stat_d(atomic_t *stat) +{ + atomic_dec(stat); +} + +#define __fscache_stat(stat) (stat) + +int fscache_stats_show(struct seq_file *m); +#else + +#define __fscache_stat(stat) (NULL) +#define fscache_stat(stat) do {} while (0) +#define fscache_stat_d(stat) do {} while (0) + +static inline int fscache_stats_show(struct seq_file *m) { return 0; } +#endif + +/* + * fscache-volume.c + */ +#ifdef CONFIG_PROC_FS +extern const struct seq_operations fscache_volumes_seq_ops; +#endif + +struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +bool fscache_begin_volume_access(struct fscache_volume *volume, + struct fscache_cookie *cookie, + enum fscache_access_trace why); +void fscache_create_volume(struct fscache_volume *volume, bool wait); + /*****************************************************************************/ /* * debug tracing @@ -143,3 +373,57 @@ do { \ #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) #endif + +/* + * assertions + */ +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif /* assert or not */ diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 7f753380e047ab5102f9908bc2f4a5f99bbdda6e..4309edf338627eee2963e1520ab6485a483e1c5d 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -21,12 +21,7 @@ */ static void netfs_clear_unread(struct netfs_io_subrequest *subreq) { - struct iov_iter iter; - - iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages, - subreq->start + subreq->transferred, - subreq->len - subreq->transferred); - iov_iter_zero(iov_iter_count(&iter), &iter); + iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); } static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, @@ -46,14 +41,9 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq, enum netfs_read_from_hole read_hole) { struct netfs_cache_resources *cres = &rreq->cache_resources; - struct iov_iter iter; netfs_stat(&netfs_n_rh_read); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, - subreq->start + subreq->transferred, - subreq->len - subreq->transferred); - - cres->ops->read(cres, subreq->start, &iter, read_hole, + cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole, netfs_cache_read_terminated, subreq); } @@ -88,6 +78,13 @@ static void netfs_read_from_server(struct netfs_io_request *rreq, struct netfs_io_subrequest *subreq) { netfs_stat(&netfs_n_rh_download); + + if (rreq->origin != NETFS_DIO_READ && + iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) + pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n", + rreq->debug_id, subreq->debug_index, + iov_iter_count(&subreq->io_iter), subreq->len, + subreq->transferred, subreq->flags); rreq->netfs_ops->issue_read(subreq); } @@ -129,7 +126,8 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, */ if (have_unlocked && folio_index(folio) <= unlocked) continue; - unlocked = folio_index(folio); + unlocked = folio_next_index(folio) - 1; + trace_netfs_folio(folio, netfs_folio_trace_end_copy); folio_end_fscache(folio); have_unlocked = true; } @@ -201,7 +199,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) } ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, - rreq->i_size, true); + subreq->len, rreq->i_size, true); if (ret < 0) { trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); @@ -259,6 +257,30 @@ static void netfs_rreq_short_read(struct netfs_io_request *rreq, netfs_read_from_server(rreq, subreq); } +/* + * Reset the subrequest iterator prior to resubmission. + */ +static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) +{ + size_t remaining = subreq->len - subreq->transferred; + size_t count = iov_iter_count(&subreq->io_iter); + + if (count == remaining) + return; + + _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + rreq->debug_id, subreq->debug_index, + iov_iter_count(&subreq->io_iter), subreq->transferred, + subreq->len, rreq->i_size, + subreq->io_iter.iter_type); + + if (count < remaining) + iov_iter_revert(&subreq->io_iter, remaining - count); + else + iov_iter_advance(&subreq->io_iter, count - remaining); +} + /* * Resubmit any short or failed operations. Returns true if we got the rreq * ref back. @@ -287,6 +309,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); atomic_inc(&rreq->nr_outstanding); + netfs_reset_subreq_iter(rreq, subreq); netfs_read_from_server(rreq, subreq); } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { netfs_rreq_short_read(rreq, subreq); @@ -320,6 +343,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq) } } +/* + * Determine how much we can admit to having read from a DIO read. + */ +static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) +{ + struct netfs_io_subrequest *subreq; + unsigned int i; + size_t transferred = 0; + + for (i = 0; i < rreq->direct_bv_count; i++) + flush_dcache_page(rreq->direct_bv[i].bv_page); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + if (subreq->error || subreq->transferred == 0) + break; + transferred += subreq->transferred; + if (subreq->transferred < subreq->len) + break; + } + + for (i = 0; i < rreq->direct_bv_count; i++) + flush_dcache_page(rreq->direct_bv[i].bv_page); + + rreq->transferred = transferred; + task_io_account_read(transferred); + + if (rreq->iocb) { + rreq->iocb->ki_pos += transferred; + if (rreq->iocb->ki_complete) + rreq->iocb->ki_complete( + rreq->iocb, rreq->error ? rreq->error : transferred); + } + if (rreq->netfs_ops->done) + rreq->netfs_ops->done(rreq); + inode_dio_end(rreq->inode); +} + /* * Assess the state of a read request and decide what to do next. * @@ -340,8 +400,12 @@ again: return; } - netfs_rreq_unlock_folios(rreq); + if (rreq->origin != NETFS_DIO_READ) + netfs_rreq_unlock_folios(rreq); + else + netfs_rreq_assess_dio(rreq); + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); @@ -399,9 +463,9 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - _enter("[%u]{%llx,%lx},%zd", - subreq->debug_index, subreq->start, subreq->flags, - transferred_or_error); + _enter("R=%x[%x]{%llx,%lx},%zd", + rreq->debug_id, subreq->debug_index, + subreq->start, subreq->flags, transferred_or_error); switch (subreq->source) { case NETFS_READ_FROM_CACHE: @@ -501,15 +565,20 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest */ static enum netfs_io_source netfs_rreq_prepare_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq) + struct netfs_io_subrequest *subreq, + struct iov_iter *io_iter) { - enum netfs_io_source source; + enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; + struct netfs_inode *ictx = netfs_inode(rreq->inode); + size_t lsize; _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); - source = netfs_cache_prepare_read(subreq, rreq->i_size); - if (source == NETFS_INVALID_READ) - goto out; + if (rreq->origin != NETFS_DIO_READ) { + source = netfs_cache_prepare_read(subreq, rreq->i_size); + if (source == NETFS_INVALID_READ) + goto out; + } if (source == NETFS_DOWNLOAD_FROM_SERVER) { /* Call out to the netfs to let it shrink the request to fit @@ -518,19 +587,52 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, * to make serial calls, it can indicate a short read and then * we will call it again. */ + if (rreq->origin != NETFS_DIO_READ) { + if (subreq->start >= ictx->zero_point) { + source = NETFS_FILL_WITH_ZEROES; + goto set; + } + if (subreq->len > ictx->zero_point - subreq->start) + subreq->len = ictx->zero_point - subreq->start; + } if (subreq->len > rreq->i_size - subreq->start) subreq->len = rreq->i_size - subreq->start; + if (rreq->rsize && subreq->len > rreq->rsize) + subreq->len = rreq->rsize; if (rreq->netfs_ops->clamp_length && !rreq->netfs_ops->clamp_length(subreq)) { source = NETFS_INVALID_READ; goto out; } + + if (subreq->max_nr_segs) { + lsize = netfs_limit_iter(io_iter, 0, subreq->len, + subreq->max_nr_segs); + if (subreq->len > lsize) { + subreq->len = lsize; + trace_netfs_sreq(subreq, netfs_sreq_trace_limited); + } + } } - if (WARN_ON(subreq->len == 0)) +set: + if (subreq->len > rreq->len) + pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n", + rreq->debug_id, subreq->debug_index, + subreq->len, rreq->len); + + if (WARN_ON(subreq->len == 0)) { source = NETFS_INVALID_READ; + goto out; + } + subreq->source = source; + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); + + subreq->io_iter = *io_iter; + iov_iter_truncate(&subreq->io_iter, subreq->len); + iov_iter_advance(io_iter, subreq->len); out: subreq->source = source; trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); @@ -541,6 +643,7 @@ out: * Slice off a piece of a read request and submit an I/O request for it. */ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, + struct iov_iter *io_iter, unsigned int *_debug_index) { struct netfs_io_subrequest *subreq; @@ -552,7 +655,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, subreq->debug_index = (*_debug_index)++; subreq->start = rreq->start + rreq->submitted; - subreq->len = rreq->len - rreq->submitted; + subreq->len = io_iter->count; _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); @@ -565,7 +668,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, * (the starts must coincide), in which case, we go around the loop * again and ask it to download the next piece. */ - source = netfs_rreq_prepare_read(rreq, subreq); + source = netfs_rreq_prepare_read(rreq, subreq, io_iter); if (source == NETFS_INVALID_READ) goto subreq_failed; @@ -603,6 +706,7 @@ subreq_failed: */ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) { + struct iov_iter io_iter; unsigned int debug_index = 0; int ret; @@ -611,50 +715,71 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) if (rreq->len == 0) { pr_err("Zero-sized read [R=%x]\n", rreq->debug_id); - netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len); return -EIO; } - INIT_WORK(&rreq->work, netfs_rreq_work); + if (rreq->origin == NETFS_DIO_READ) + inode_dio_begin(rreq->inode); - if (sync) - netfs_get_request(rreq, netfs_rreq_trace_get_hold); + // TODO: Use bounce buffer if requested + rreq->io_iter = rreq->iter; + + INIT_WORK(&rreq->work, netfs_rreq_work); /* Chop the read into slices according to what the cache and the netfs * want and submit each one. */ + netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding); atomic_set(&rreq->nr_outstanding, 1); + io_iter = rreq->io_iter; do { - if (!netfs_rreq_submit_slice(rreq, &debug_index)) + _debug("submit %llx + %zx >= %llx", + rreq->start, rreq->submitted, rreq->i_size); + if (rreq->origin == NETFS_DIO_READ && + rreq->start + rreq->submitted >= rreq->i_size) + break; + if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index)) + break; + if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && + test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; } while (rreq->submitted < rreq->len); + if (!rreq->submitted) { + netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + ret = 0; + goto out; + } + if (sync) { - /* Keep nr_outstanding incremented so that the ref always belongs to - * us, and the service code isn't punted off to a random thread pool to - * process. + /* Keep nr_outstanding incremented so that the ref always + * belongs to us, and the service code isn't punted off to a + * random thread pool to process. Note that this might start + * further work, such as writing to the cache. */ - for (;;) { - wait_var_event(&rreq->nr_outstanding, - atomic_read(&rreq->nr_outstanding) == 1); + wait_var_event(&rreq->nr_outstanding, + atomic_read(&rreq->nr_outstanding) == 1); + if (atomic_dec_and_test(&rreq->nr_outstanding)) netfs_rreq_assess(rreq, false); - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - break; - cond_resched(); - } + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); + wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, + TASK_UNINTERRUPTIBLE); ret = rreq->error; - if (ret == 0 && rreq->submitted < rreq->len) { + if (ret == 0 && rreq->submitted < rreq->len && + rreq->origin != NETFS_DIO_READ) { trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); ret = -EIO; } - netfs_put_request(rreq, false, netfs_rreq_trace_put_hold); } else { /* If we decrement nr_outstanding to 0, the ref belongs to us. */ if (atomic_dec_and_test(&rreq->nr_outstanding)) netfs_rreq_assess(rreq, false); - ret = 0; + ret = -EIOCBQUEUED; } + +out: return ret; } diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 2ff07ba655a072b3c0e31c6bf473a7a80ea0c24f..b781bbbf1d8d643727e4710358e4211face70bd1 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -101,3 +101,100 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, return npages; } EXPORT_SYMBOL_GPL(netfs_extract_user_iter); + +/* + * Select the span of a bvec iterator we're going to use. Limit it by both maximum + * size and maximum number of segments. Returns the size of the span in bytes. + */ +static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs) +{ + const struct bio_vec *bvecs = iter->bvec; + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset + start_offset; + + if (WARN_ON(!iov_iter_is_bvec(iter)) || + WARN_ON(start_offset > n) || + n == 0) + return 0; + + while (n && ix < nbv && skip) { + len = bvecs[ix].bv_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nbv) { + len = min3(n, bvecs[ix].bv_len - skip, max_size); + span += len; + nsegs++; + ix++; + if (span >= max_size || nsegs >= max_segs) + break; + skip = 0; + n -= len; + } + + return min(span, max_size); +} + +/* + * Select the span of an xarray iterator we're going to use. Limit it by both + * maximum size and maximum number of segments. It is assumed that segments + * can be larger than a page in size, provided they're physically contiguous. + * Returns the size of the span in bytes. + */ +static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs) +{ + struct folio *folio; + unsigned int nsegs = 0; + loff_t pos = iter->xarray_start + iter->iov_offset; + pgoff_t index = pos / PAGE_SIZE; + size_t span = 0, n = iter->count; + + XA_STATE(xas, iter->xarray, index); + + if (WARN_ON(!iov_iter_is_xarray(iter)) || + WARN_ON(start_offset > n) || + n == 0) + return 0; + max_size = min(max_size, n - start_offset); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t offset, flen, len; + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + flen = folio_size(folio); + offset = offset_in_folio(folio, pos); + len = min(max_size, flen - offset); + span += len; + nsegs++; + if (span >= max_size || nsegs >= max_segs) + break; + } + + rcu_read_unlock(); + return min(span, max_size); +} + +size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs) +{ + if (iov_iter_is_bvec(iter)) + return netfs_limit_bvec(iter, start_offset, max_size, max_segs); + if (iov_iter_is_xarray(iter)) + return netfs_limit_xarray(iter, start_offset, max_size, max_segs); + BUG(); +} +EXPORT_SYMBOL(netfs_limit_iter); diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c new file mode 100644 index 0000000000000000000000000000000000000000..75dc52a49b3a4646d907cd1da3b0ddcd63374201 --- /dev/null +++ b/fs/netfs/locking.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * I/O and data path helper functionality. + * + * Borrowed from NFS Copyright (c) 2016 Trond Myklebust + */ + +#include +#include +#include "internal.h" + +/* + * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish + * @inode: inode to wait for + * + * Waits for all pending direct I/O requests to finish so that we can + * proceed with a truncate or equivalent operation. + * + * Must be called under a lock that serializes taking new references + * to i_dio_count, usually by inode->i_mutex. + */ +static int inode_dio_wait_interruptible(struct inode *inode) +{ + if (!atomic_read(&inode->i_dio_count)) + return 0; + + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); + DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); + + for (;;) { + prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); + if (!atomic_read(&inode->i_dio_count)) + break; + if (signal_pending(current)) + break; + schedule(); + } + finish_wait(wq, &q.wq_entry); + + return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; +} + +/* Call with exclusively locked inode->i_rwsem */ +static int netfs_block_o_direct(struct netfs_inode *ictx) +{ + if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) + return 0; + clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags); + return inode_dio_wait_interruptible(&ictx->inode); +} + +/** + * netfs_start_io_read - declare the file is being used for buffered reads + * @inode: file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is unset, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that buffered read operations are allowed to + * execute in parallel, thanks to the shared lock, whereas direct I/O + * operations need to wait to grab an exclusive lock in order to set + * NETFS_ICTX_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. + */ +int netfs_start_io_read(struct inode *inode) + __acquires(inode->i_rwsem) +{ + struct netfs_inode *ictx = netfs_inode(inode); + + /* Be an optimist! */ + if (down_read_interruptible(&inode->i_rwsem) < 0) + return -ERESTARTSYS; + if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) == 0) + return 0; + up_read(&inode->i_rwsem); + + /* Slow path.... */ + if (down_write_killable(&inode->i_rwsem) < 0) + return -ERESTARTSYS; + if (netfs_block_o_direct(ictx) < 0) { + up_write(&inode->i_rwsem); + return -ERESTARTSYS; + } + downgrade_write(&inode->i_rwsem); + return 0; +} +EXPORT_SYMBOL(netfs_start_io_read); + +/** + * netfs_end_io_read - declare that the buffered read operation is done + * @inode: file inode + * + * Declare that a buffered read operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void netfs_end_io_read(struct inode *inode) + __releases(inode->i_rwsem) +{ + up_read(&inode->i_rwsem); +} +EXPORT_SYMBOL(netfs_end_io_read); + +/** + * netfs_start_io_write - declare the file is being used for buffered writes + * @inode: file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + */ +int netfs_start_io_write(struct inode *inode) + __acquires(inode->i_rwsem) +{ + struct netfs_inode *ictx = netfs_inode(inode); + + if (down_write_killable(&inode->i_rwsem) < 0) + return -ERESTARTSYS; + if (netfs_block_o_direct(ictx) < 0) { + up_write(&inode->i_rwsem); + return -ERESTARTSYS; + } + return 0; +} +EXPORT_SYMBOL(netfs_start_io_write); + +/** + * netfs_end_io_write - declare that the buffered write operation is done + * @inode: file inode + * + * Declare that a buffered write operation is done, and release the + * lock on inode->i_rwsem. + */ +void netfs_end_io_write(struct inode *inode) + __releases(inode->i_rwsem) +{ + up_write(&inode->i_rwsem); +} +EXPORT_SYMBOL(netfs_end_io_write); + +/* Call with exclusively locked inode->i_rwsem */ +static int netfs_block_buffered(struct inode *inode) +{ + struct netfs_inode *ictx = netfs_inode(inode); + int ret; + + if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) { + set_bit(NETFS_ICTX_ODIRECT, &ictx->flags); + if (inode->i_mapping->nrpages != 0) { + unmap_mapping_range(inode->i_mapping, 0, 0, 0); + ret = filemap_fdatawait(inode->i_mapping); + if (ret < 0) { + clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags); + return ret; + } + } + } + return 0; +} + +/** + * netfs_start_io_direct - declare the file is being used for direct i/o + * @inode: file inode + * + * Declare that a direct I/O operation is about to start, and ensure + * that we block all buffered I/O. + * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is set, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that direct I/O operations are allowed to + * execute in parallel, thanks to the shared lock, whereas buffered I/O + * operations need to wait to grab an exclusive lock in order to clear + * NETFS_ICTX_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. + */ +int netfs_start_io_direct(struct inode *inode) + __acquires(inode->i_rwsem) +{ + struct netfs_inode *ictx = netfs_inode(inode); + int ret; + + /* Be an optimist! */ + if (down_read_interruptible(&inode->i_rwsem) < 0) + return -ERESTARTSYS; + if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) != 0) + return 0; + up_read(&inode->i_rwsem); + + /* Slow path.... */ + if (down_write_killable(&inode->i_rwsem) < 0) + return -ERESTARTSYS; + ret = netfs_block_buffered(inode); + if (ret < 0) { + up_write(&inode->i_rwsem); + return ret; + } + downgrade_write(&inode->i_rwsem); + return 0; +} +EXPORT_SYMBOL(netfs_start_io_direct); + +/** + * netfs_end_io_direct - declare that the direct i/o operation is done + * @inode: file inode + * + * Declare that a direct I/O operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void netfs_end_io_direct(struct inode *inode) + __releases(inode->i_rwsem) +{ + up_read(&inode->i_rwsem); +} +EXPORT_SYMBOL(netfs_end_io_direct); diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 068568702957e867d539b210e136aa2e66ad3746..5e77618a79409c253ab21aa51c186a07f691f356 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -7,6 +7,8 @@ #include #include +#include +#include #include "internal.h" #define CREATE_TRACE_POINTS #include @@ -15,6 +17,113 @@ MODULE_DESCRIPTION("Network fs support"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); +EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); + unsigned netfs_debug; module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); + +#ifdef CONFIG_PROC_FS +LIST_HEAD(netfs_io_requests); +DEFINE_SPINLOCK(netfs_proc_lock); + +static const char *netfs_origins[nr__netfs_io_origin] = { + [NETFS_READAHEAD] = "RA", + [NETFS_READPAGE] = "RP", + [NETFS_READ_FOR_WRITE] = "RW", + [NETFS_WRITEBACK] = "WB", + [NETFS_WRITETHROUGH] = "WT", + [NETFS_LAUNDER_WRITE] = "LW", + [NETFS_UNBUFFERED_WRITE] = "UW", + [NETFS_DIO_READ] = "DR", + [NETFS_DIO_WRITE] = "DW", +}; + +/* + * Generate a list of I/O requests in /proc/fs/netfs/requests + */ +static int netfs_requests_seq_show(struct seq_file *m, void *v) +{ + struct netfs_io_request *rreq; + + if (v == &netfs_io_requests) { + seq_puts(m, + "REQUEST OR REF FL ERR OPS COVERAGE\n" + "======== == === == ==== === =========\n" + ); + return 0; + } + + rreq = list_entry(v, struct netfs_io_request, proc_link); + seq_printf(m, + "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx", + rreq->debug_id, + netfs_origins[rreq->origin], + refcount_read(&rreq->ref), + rreq->flags, + rreq->error, + atomic_read(&rreq->nr_outstanding), + rreq->start, rreq->submitted, rreq->len); + seq_putc(m, '\n'); + return 0; +} + +static void *netfs_requests_seq_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) +{ + rcu_read_lock(); + return seq_list_start_head(&netfs_io_requests, *_pos); +} + +static void *netfs_requests_seq_next(struct seq_file *m, void *v, loff_t *_pos) +{ + return seq_list_next(v, &netfs_io_requests, _pos); +} + +static void netfs_requests_seq_stop(struct seq_file *m, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +static const struct seq_operations netfs_requests_seq_ops = { + .start = netfs_requests_seq_start, + .next = netfs_requests_seq_next, + .stop = netfs_requests_seq_stop, + .show = netfs_requests_seq_show, +}; +#endif /* CONFIG_PROC_FS */ + +static int __init netfs_init(void) +{ + int ret = -ENOMEM; + + if (!proc_mkdir("fs/netfs", NULL)) + goto error; + if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, + &netfs_requests_seq_ops)) + goto error_proc; +#ifdef CONFIG_FSCACHE_STATS + if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, + netfs_stats_show)) + goto error_proc; +#endif + + ret = fscache_init(); + if (ret < 0) + goto error_proc; + return 0; + +error_proc: + remove_proc_entry("fs/netfs", NULL); +error: + return ret; +} +fs_initcall(netfs_init); + +static void __exit netfs_exit(void) +{ + fscache_exit(); + remove_proc_entry("fs/netfs", NULL); +} +module_exit(netfs_exit); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c new file mode 100644 index 0000000000000000000000000000000000000000..0e3af37fc9243f7a0d351840904aa0ce5d91ee59 --- /dev/null +++ b/fs/netfs/misc.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Miscellaneous routines. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include "internal.h" + +/* + * Attach a folio to the buffer and maybe set marks on it to say that we need + * to put the folio later and twiddle the pagecache flags. + */ +int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, + struct folio *folio, unsigned int flags, + gfp_t gfp_mask) +{ + XA_STATE_ORDER(xas, xa, index, folio_order(folio)); + +retry: + xas_lock(&xas); + for (;;) { + xas_store(&xas, folio); + if (!xas_error(&xas)) + break; + xas_unlock(&xas); + if (!xas_nomem(&xas, gfp_mask)) + return xas_error(&xas); + goto retry; + } + + if (flags & NETFS_FLAG_PUT_MARK) + xas_set_mark(&xas, NETFS_BUF_PUT_MARK); + if (flags & NETFS_FLAG_PAGECACHE_MARK) + xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK); + xas_unlock(&xas); + return xas_error(&xas); +} + +/* + * Create the specified range of folios in the buffer attached to the read + * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that + * these need freeing later. + */ +int netfs_add_folios_to_buffer(struct xarray *buffer, + struct address_space *mapping, + pgoff_t index, pgoff_t to, gfp_t gfp_mask) +{ + struct folio *folio; + int ret; + + if (to + 1 == index) /* Page range is inclusive */ + return 0; + + do { + /* TODO: Figure out what order folio can be allocated here */ + folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0); + if (!folio) + return -ENOMEM; + folio->index = index; + ret = netfs_xa_store_and_mark(buffer, index, folio, + NETFS_FLAG_PUT_MARK, gfp_mask); + if (ret < 0) { + folio_put(folio); + return ret; + } + + index += folio_nr_pages(folio); + } while (index <= to && index != 0); + + return 0; +} + +/* + * Clear an xarray buffer, putting a ref on the folios that have + * NETFS_BUF_PUT_MARK set. + */ +void netfs_clear_buffer(struct xarray *buffer) +{ + struct folio *folio; + XA_STATE(xas, buffer, 0); + + rcu_read_lock(); + xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) { + folio_put(folio); + } + rcu_read_unlock(); + xa_destroy(buffer); +} + +/** + * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback + * @mapping: The mapping the folio belongs to. + * @folio: The folio being dirtied. + * + * Set the dirty flag on a folio and pin an in-use cache object in memory so + * that writeback can later write to it. This is intended to be called from + * the filesystem's ->dirty_folio() method. + * + * Return: true if the dirty flag was set on the folio, false otherwise. + */ +bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) +{ + struct inode *inode = mapping->host; + struct netfs_inode *ictx = netfs_inode(inode); + struct fscache_cookie *cookie = netfs_i_cookie(ictx); + bool need_use = false; + + _enter(""); + + if (!filemap_dirty_folio(mapping, folio)) + return false; + if (!fscache_cookie_valid(cookie)) + return true; + + if (!(inode->i_state & I_PINNING_NETFS_WB)) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_PINNING_NETFS_WB)) { + inode->i_state |= I_PINNING_NETFS_WB; + need_use = true; + } + spin_unlock(&inode->i_lock); + + if (need_use) + fscache_use_cookie(cookie, true); + } + return true; +} +EXPORT_SYMBOL(netfs_dirty_folio); + +/** + * netfs_unpin_writeback - Unpin writeback resources + * @inode: The inode on which the cookie resides + * @wbc: The writeback control + * + * Unpin the writeback resources pinned by netfs_dirty_folio(). This is + * intended to be called as/by the netfs's ->write_inode() method. + */ +int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc) +{ + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); + + if (wbc->unpinned_netfs_wb) + fscache_unuse_cookie(cookie, NULL, NULL); + return 0; +} +EXPORT_SYMBOL(netfs_unpin_writeback); + +/** + * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode + * @inode: The inode to clean up + * @aux: Auxiliary data to apply to the inode + * + * Clear any writeback resources held by an inode when the inode is evicted. + * This must be called before clear_inode() is called. + */ +void netfs_clear_inode_writeback(struct inode *inode, const void *aux) +{ + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); + + if (inode->i_state & I_PINNING_NETFS_WB) { + loff_t i_size = i_size_read(inode); + fscache_unuse_cookie(cookie, aux, &i_size); + } +} +EXPORT_SYMBOL(netfs_clear_inode_writeback); + +/** + * netfs_invalidate_folio - Invalidate or partially invalidate a folio + * @folio: Folio proposed for release + * @offset: Offset of the invalidated region + * @length: Length of the invalidated region + * + * Invalidate part or all of a folio for a network filesystem. The folio will + * be removed afterwards if the invalidated region covers the entire folio. + */ +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) +{ + struct netfs_folio *finfo = NULL; + size_t flen = folio_size(folio); + + _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); + + folio_wait_fscache(folio); + + if (!folio_test_private(folio)) + return; + + finfo = netfs_folio_info(folio); + + if (offset == 0 && length >= flen) + goto erase_completely; + + if (finfo) { + /* We have a partially uptodate page from a streaming write. */ + unsigned int fstart = finfo->dirty_offset; + unsigned int fend = fstart + finfo->dirty_len; + unsigned int end = offset + length; + + if (offset >= fend) + return; + if (end <= fstart) + return; + if (offset <= fstart && end >= fend) + goto erase_completely; + if (offset <= fstart && end > fstart) + goto reduce_len; + if (offset > fstart && end >= fend) + goto move_start; + /* A partial write was split. The caller has already zeroed + * it, so just absorb the hole. + */ + } + return; + +erase_completely: + netfs_put_group(netfs_folio_group(folio)); + folio_detach_private(folio); + folio_clear_uptodate(folio); + kfree(finfo); + return; +reduce_len: + finfo->dirty_len = offset + length - finfo->dirty_offset; + return; +move_start: + finfo->dirty_len -= offset - finfo->dirty_offset; + finfo->dirty_offset = offset; +} +EXPORT_SYMBOL(netfs_invalidate_folio); + +/** + * netfs_release_folio - Try to release a folio + * @folio: Folio proposed for release + * @gfp: Flags qualifying the release + * + * Request release of a folio and clean up its private state if it's not busy. + * Returns true if the folio can now be released, false if not + */ +bool netfs_release_folio(struct folio *folio, gfp_t gfp) +{ + struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); + unsigned long long end; + + end = folio_pos(folio) + folio_size(folio); + if (end > ctx->zero_point) + ctx->zero_point = end; + + if (folio_test_private(folio)) + return false; + if (folio_test_fscache(folio)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + folio_wait_fscache(folio); + } + + fscache_note_page_release(netfs_i_cookie(ctx)); + return true; +} +EXPORT_SYMBOL(netfs_release_folio); diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index e17cdf53f6a7883a3459c47d5695554e516f4c51..610ceb5bd86c08ba7c61905d07d19092940f44ae 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -20,14 +20,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct inode *inode = file ? file_inode(file) : mapping->host; struct netfs_inode *ctx = netfs_inode(inode); struct netfs_io_request *rreq; + bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || + origin == NETFS_DIO_READ || + origin == NETFS_DIO_WRITE); + bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); int ret; - rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); + rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request), + GFP_KERNEL); if (!rreq) return ERR_PTR(-ENOMEM); rreq->start = start; rreq->len = len; + rreq->upper_len = len; rreq->origin = origin; rreq->netfs_ops = ctx->ops; rreq->mapping = mapping; @@ -35,8 +41,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, rreq->i_size = i_size_read(inode); rreq->debug_id = atomic_inc_return(&debug_ids); INIT_LIST_HEAD(&rreq->subrequests); + INIT_WORK(&rreq->work, NULL); refcount_set(&rreq->ref, 1); + __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + if (cached) + __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); + if (file && file->f_flags & O_NONBLOCK) + __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { ret = rreq->netfs_ops->init_request(rreq, file); if (ret < 0) { @@ -45,6 +57,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } } + trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); + netfs_proc_add_rreq(rreq); netfs_stat(&netfs_n_rh_rreq); return rreq; } @@ -74,33 +88,47 @@ static void netfs_free_request(struct work_struct *work) { struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); + unsigned int i; trace_netfs_rreq(rreq, netfs_rreq_trace_free); + netfs_proc_del_rreq(rreq); netfs_clear_subrequests(rreq, false); if (rreq->netfs_ops->free_request) rreq->netfs_ops->free_request(rreq); if (rreq->cache_resources.ops) rreq->cache_resources.ops->end_operation(&rreq->cache_resources); - kfree(rreq); + if (rreq->direct_bv) { + for (i = 0; i < rreq->direct_bv_count; i++) { + if (rreq->direct_bv[i].bv_page) { + if (rreq->direct_bv_unpin) + unpin_user_page(rreq->direct_bv[i].bv_page); + } + } + kvfree(rreq->direct_bv); + } + kfree_rcu(rreq, rcu); netfs_stat_d(&netfs_n_rh_rreq); } void netfs_put_request(struct netfs_io_request *rreq, bool was_async, enum netfs_rreq_ref_trace what) { - unsigned int debug_id = rreq->debug_id; + unsigned int debug_id; bool dead; int r; - dead = __refcount_dec_and_test(&rreq->ref, &r); - trace_netfs_rreq_ref(debug_id, r - 1, what); - if (dead) { - if (was_async) { - rreq->work.func = netfs_free_request; - if (!queue_work(system_unbound_wq, &rreq->work)) - BUG(); - } else { - netfs_free_request(&rreq->work); + if (rreq) { + debug_id = rreq->debug_id; + dead = __refcount_dec_and_test(&rreq->ref, &r); + trace_netfs_rreq_ref(debug_id, r - 1, what); + if (dead) { + if (was_async) { + rreq->work.func = netfs_free_request; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); + } else { + netfs_free_request(&rreq->work); + } } } } @@ -112,8 +140,11 @@ struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq { struct netfs_io_subrequest *subreq; - subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL); + subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?: + sizeof(struct netfs_io_subrequest), + GFP_KERNEL); if (subreq) { + INIT_WORK(&subreq->work, NULL); INIT_LIST_HEAD(&subreq->rreq_link); refcount_set(&subreq->ref, 2); subreq->rreq = rreq; @@ -140,6 +171,8 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; trace_netfs_sreq(subreq, netfs_sreq_trace_free); + if (rreq->netfs_ops->free_subrequest) + rreq->netfs_ops->free_subrequest(subreq); kfree(subreq); netfs_stat_d(&netfs_n_rh_sreq); netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); diff --git a/fs/netfs/output.c b/fs/netfs/output.c new file mode 100644 index 0000000000000000000000000000000000000000..625eb68f3e5ad50f89848e197ac011cae515fe56 --- /dev/null +++ b/fs/netfs/output.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Network filesystem high-level write support. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/** + * netfs_create_write_request - Create a write operation. + * @wreq: The write request this is storing from. + * @dest: The destination type + * @start: Start of the region this write will modify + * @len: Length of the modification + * @worker: The worker function to handle the write(s) + * + * Allocate a write operation, set it up and add it to the list on a write + * request. + */ +struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, + enum netfs_io_source dest, + loff_t start, size_t len, + work_func_t worker) +{ + struct netfs_io_subrequest *subreq; + + subreq = netfs_alloc_subrequest(wreq); + if (subreq) { + INIT_WORK(&subreq->work, worker); + subreq->source = dest; + subreq->start = start; + subreq->len = len; + subreq->debug_index = wreq->subreq_counter++; + + switch (subreq->source) { + case NETFS_UPLOAD_TO_SERVER: + netfs_stat(&netfs_n_wh_upload); + break; + case NETFS_WRITE_TO_CACHE: + netfs_stat(&netfs_n_wh_write); + break; + default: + BUG(); + } + + subreq->io_iter = wreq->io_iter; + iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); + iov_iter_truncate(&subreq->io_iter, subreq->len); + + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, + refcount_read(&subreq->ref), + netfs_sreq_trace_new); + atomic_inc(&wreq->nr_outstanding); + list_add_tail(&subreq->rreq_link, &wreq->subrequests); + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); + } + + return subreq; +} +EXPORT_SYMBOL(netfs_create_write_request); + +/* + * Process a completed write request once all the component operations have + * been completed. + */ +static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) +{ + struct netfs_io_subrequest *subreq; + struct netfs_inode *ctx = netfs_inode(wreq->inode); + size_t transferred = 0; + + _enter("R=%x[]", wreq->debug_id); + + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); + + list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { + if (subreq->error || subreq->transferred == 0) + break; + transferred += subreq->transferred; + if (subreq->transferred < subreq->len) + break; + } + wreq->transferred = transferred; + + list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { + if (!subreq->error) + continue; + switch (subreq->source) { + case NETFS_UPLOAD_TO_SERVER: + /* Depending on the type of failure, this may prevent + * writeback completion unless we're in disconnected + * mode. + */ + if (!wreq->error) + wreq->error = subreq->error; + break; + + case NETFS_WRITE_TO_CACHE: + /* Failure doesn't prevent writeback completion unless + * we're in disconnected mode. + */ + if (subreq->error != -ENOBUFS) + ctx->ops->invalidate_cache(wreq); + break; + + default: + WARN_ON_ONCE(1); + if (!wreq->error) + wreq->error = -EIO; + return; + } + } + + wreq->cleanup(wreq); + + if (wreq->origin == NETFS_DIO_WRITE && + wreq->mapping->nrpages) { + pgoff_t first = wreq->start >> PAGE_SHIFT; + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; + invalidate_inode_pages2_range(wreq->mapping, first, last); + } + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_end(wreq->inode); + + _debug("finished"); + trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); + clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); + + if (wreq->iocb) { + wreq->iocb->ki_pos += transferred; + if (wreq->iocb->ki_complete) + wreq->iocb->ki_complete( + wreq->iocb, wreq->error ? wreq->error : transferred); + } + + netfs_clear_subrequests(wreq, was_async); + netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); +} + +/* + * Deal with the completion of writing the data to the cache. + */ +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, + bool was_async) +{ + struct netfs_io_subrequest *subreq = _op; + struct netfs_io_request *wreq = subreq->rreq; + unsigned int u; + + _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); + + switch (subreq->source) { + case NETFS_UPLOAD_TO_SERVER: + netfs_stat(&netfs_n_wh_upload_done); + break; + case NETFS_WRITE_TO_CACHE: + netfs_stat(&netfs_n_wh_write_done); + break; + case NETFS_INVALID_WRITE: + break; + default: + BUG(); + } + + if (IS_ERR_VALUE(transferred_or_error)) { + subreq->error = transferred_or_error; + trace_netfs_failure(wreq, subreq, transferred_or_error, + netfs_fail_write); + goto failed; + } + + if (WARN(transferred_or_error > subreq->len - subreq->transferred, + "Subreq excess write: R%x[%x] %zd > %zu - %zu", + wreq->debug_id, subreq->debug_index, + transferred_or_error, subreq->len, subreq->transferred)) + transferred_or_error = subreq->len - subreq->transferred; + + subreq->error = 0; + subreq->transferred += transferred_or_error; + + if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) + pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", + wreq->debug_id, subreq->debug_index, + iov_iter_count(&subreq->io_iter), subreq->len, + subreq->transferred, subreq->io_iter.iter_type); + + if (subreq->transferred < subreq->len) + goto incomplete; + + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); +out: + trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); + + /* If we decrement nr_outstanding to 0, the ref belongs to us. */ + u = atomic_dec_return(&wreq->nr_outstanding); + if (u == 0) + netfs_write_terminated(wreq, was_async); + else if (u == 1) + wake_up_var(&wreq->nr_outstanding); + + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); + return; + +incomplete: + if (transferred_or_error == 0) { + if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { + subreq->error = -ENODATA; + goto failed; + } + } else { + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); + } + + __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); + set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); + goto out; + +failed: + switch (subreq->source) { + case NETFS_WRITE_TO_CACHE: + netfs_stat(&netfs_n_wh_write_failed); + set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); + break; + case NETFS_UPLOAD_TO_SERVER: + netfs_stat(&netfs_n_wh_upload_failed); + set_bit(NETFS_RREQ_FAILED, &wreq->flags); + wreq->error = subreq->error; + break; + default: + break; + } + goto out; +} +EXPORT_SYMBOL(netfs_write_subrequest_terminated); + +static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *wreq = subreq->rreq; + struct netfs_cache_resources *cres = &wreq->cache_resources; + + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); + + cres->ops->write(cres, subreq->start, &subreq->io_iter, + netfs_write_subrequest_terminated, subreq); +} + +static void netfs_write_to_cache_op_worker(struct work_struct *work) +{ + struct netfs_io_subrequest *subreq = + container_of(work, struct netfs_io_subrequest, work); + + netfs_write_to_cache_op(subreq); +} + +/** + * netfs_queue_write_request - Queue a write request for attention + * @subreq: The write request to be queued + * + * Queue the specified write request for processing by a worker thread. We + * pass the caller's ref on the request to the worker thread. + */ +void netfs_queue_write_request(struct netfs_io_subrequest *subreq) +{ + if (!queue_work(system_unbound_wq, &subreq->work)) + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); +} +EXPORT_SYMBOL(netfs_queue_write_request); + +/* + * Set up a op for writing to the cache. + */ +static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) +{ + struct netfs_cache_resources *cres = &wreq->cache_resources; + struct netfs_io_subrequest *subreq; + struct netfs_inode *ctx = netfs_inode(wreq->inode); + struct fscache_cookie *cookie = netfs_i_cookie(ctx); + loff_t start = wreq->start; + size_t len = wreq->len; + int ret; + + if (!fscache_cookie_enabled(cookie)) { + clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); + return; + } + + _debug("write to cache"); + ret = fscache_begin_write_operation(cres, cookie); + if (ret < 0) + return; + + ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, + i_size_read(wreq->inode), true); + if (ret < 0) + return; + + subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, + netfs_write_to_cache_op_worker); + if (!subreq) + return; + + netfs_write_to_cache_op(subreq); +} + +/* + * Begin the process of writing out a chunk of data. + * + * We are given a write request that holds a series of dirty regions and + * (partially) covers a sequence of folios, all of which are present. The + * pages must have been marked as writeback as appropriate. + * + * We need to perform the following steps: + * + * (1) If encrypting, create an output buffer and encrypt each block of the + * data into it, otherwise the output buffer will point to the original + * folios. + * + * (2) If the data is to be cached, set up a write op for the entire output + * buffer to the cache, if the cache wants to accept it. + * + * (3) If the data is to be uploaded (ie. not merely cached): + * + * (a) If the data is to be compressed, create a compression buffer and + * compress the data into it. + * + * (b) For each destination we want to upload to, set up write ops to write + * to that destination. We may need multiple writes if the data is not + * contiguous or the span exceeds wsize for a server. + */ +int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, + enum netfs_write_trace what) +{ + struct netfs_inode *ctx = netfs_inode(wreq->inode); + + _enter("R=%x %llx-%llx f=%lx", + wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, + wreq->flags); + + trace_netfs_write(wreq, what); + if (wreq->len == 0 || wreq->iter.count == 0) { + pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); + return -EIO; + } + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_begin(wreq->inode); + + wreq->io_iter = wreq->iter; + + /* ->outstanding > 0 carries a ref */ + netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); + atomic_set(&wreq->nr_outstanding, 1); + + /* Start the encryption/compression going. We can do that in the + * background whilst we generate a list of write ops that we want to + * perform. + */ + // TODO: Encrypt or compress the region as appropriate + + /* We need to write all of the region to the cache */ + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) + netfs_set_up_write_to_cache(wreq); + + /* However, we don't necessarily write all of the region to the server. + * Caching of reads is being managed this way also. + */ + if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) + ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); + + if (atomic_dec_and_test(&wreq->nr_outstanding)) + netfs_write_terminated(wreq, false); + + if (!may_wait) + return -EIOCBQUEUED; + + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, + TASK_UNINTERRUPTIBLE); + return wreq->error; +} + +/* + * Begin a write operation for writing through the pagecache. + */ +struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) +{ + struct netfs_io_request *wreq; + struct file *file = iocb->ki_filp; + + wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, + NETFS_WRITETHROUGH); + if (IS_ERR(wreq)) + return wreq; + + trace_netfs_write(wreq, netfs_write_trace_writethrough); + + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); + iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); + wreq->io_iter = wreq->iter; + + /* ->outstanding > 0 carries a ref */ + netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); + atomic_set(&wreq->nr_outstanding, 1); + return wreq; +} + +static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) +{ + struct netfs_inode *ictx = netfs_inode(wreq->inode); + unsigned long long start; + size_t len; + + if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) + return; + + start = wreq->start + wreq->submitted; + len = wreq->iter.count - wreq->submitted; + if (!final) { + len /= wreq->wsize; /* Round to number of maximum packets */ + len *= wreq->wsize; + } + + ictx->ops->create_write_requests(wreq, start, len); + wreq->submitted += len; +} + +/* + * Advance the state of the write operation used when writing through the + * pagecache. Data has been copied into the pagecache that we need to append + * to the request. If we've added more than wsize then we need to create a new + * subrequest. + */ +int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) +{ + _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", + wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); + + wreq->iter.count += copied; + wreq->io_iter.count += copied; + if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) + netfs_submit_writethrough(wreq, false); + + return wreq->error; +} + +/* + * End a write operation used when writing through the pagecache. + */ +int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) +{ + int ret = -EIOCBQUEUED; + + _enter("ic=%zu sb=%zu ws=%u", + wreq->iter.count, wreq->submitted, wreq->wsize); + + if (wreq->submitted < wreq->io_iter.count) + netfs_submit_writethrough(wreq, true); + + if (atomic_dec_and_test(&wreq->nr_outstanding)) + netfs_write_terminated(wreq, false); + + if (is_sync_kiocb(iocb)) { + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, + TASK_UNINTERRUPTIBLE); + ret = wreq->error; + } + + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + return ret; +} diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c index 5510a7a14a40dda1a53d344399852d001252aaa0..deeba9f9dcf5d55f7bf0692ecdf5991334a848ea 100644 --- a/fs/netfs/stats.c +++ b/fs/netfs/stats.c @@ -9,6 +9,8 @@ #include #include "internal.h" +atomic_t netfs_n_rh_dio_read; +atomic_t netfs_n_rh_dio_write; atomic_t netfs_n_rh_readahead; atomic_t netfs_n_rh_readpage; atomic_t netfs_n_rh_rreq; @@ -27,32 +29,48 @@ atomic_t netfs_n_rh_write_begin; atomic_t netfs_n_rh_write_done; atomic_t netfs_n_rh_write_failed; atomic_t netfs_n_rh_write_zskip; +atomic_t netfs_n_wh_wstream_conflict; +atomic_t netfs_n_wh_upload; +atomic_t netfs_n_wh_upload_done; +atomic_t netfs_n_wh_upload_failed; +atomic_t netfs_n_wh_write; +atomic_t netfs_n_wh_write_done; +atomic_t netfs_n_wh_write_failed; -void netfs_stats_show(struct seq_file *m) +int netfs_stats_show(struct seq_file *m, void *v) { - seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n", + seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n", + atomic_read(&netfs_n_rh_dio_read), + atomic_read(&netfs_n_rh_dio_write), atomic_read(&netfs_n_rh_readahead), atomic_read(&netfs_n_rh_readpage), atomic_read(&netfs_n_rh_write_begin), - atomic_read(&netfs_n_rh_write_zskip), - atomic_read(&netfs_n_rh_rreq), - atomic_read(&netfs_n_rh_sreq)); - seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n", + atomic_read(&netfs_n_rh_write_zskip)); + seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n", atomic_read(&netfs_n_rh_zero), atomic_read(&netfs_n_rh_short_read), atomic_read(&netfs_n_rh_write_zskip)); - seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n", + seq_printf(m, "Netfs : DL=%u ds=%u df=%u di=%u\n", atomic_read(&netfs_n_rh_download), atomic_read(&netfs_n_rh_download_done), atomic_read(&netfs_n_rh_download_failed), atomic_read(&netfs_n_rh_download_instead)); - seq_printf(m, "RdHelp : RD=%u rs=%u rf=%u\n", + seq_printf(m, "Netfs : RD=%u rs=%u rf=%u\n", atomic_read(&netfs_n_rh_read), atomic_read(&netfs_n_rh_read_done), atomic_read(&netfs_n_rh_read_failed)); - seq_printf(m, "RdHelp : WR=%u ws=%u wf=%u\n", - atomic_read(&netfs_n_rh_write), - atomic_read(&netfs_n_rh_write_done), - atomic_read(&netfs_n_rh_write_failed)); + seq_printf(m, "Netfs : UL=%u us=%u uf=%u\n", + atomic_read(&netfs_n_wh_upload), + atomic_read(&netfs_n_wh_upload_done), + atomic_read(&netfs_n_wh_upload_failed)); + seq_printf(m, "Netfs : WR=%u ws=%u wf=%u\n", + atomic_read(&netfs_n_wh_write), + atomic_read(&netfs_n_wh_write_done), + atomic_read(&netfs_n_wh_write_failed)); + seq_printf(m, "Netfs : rr=%u sr=%u wsc=%u\n", + atomic_read(&netfs_n_rh_rreq), + atomic_read(&netfs_n_rh_sreq), + atomic_read(&netfs_n_wh_wstream_conflict)); + return fscache_stats_show(m); } EXPORT_SYMBOL(netfs_stats_show); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 01ac733a63203a459a994a0ec9df8d6006fcb875..f7e32d76e34d74b76aba8f6d31bcdb95a310f2f1 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -169,8 +169,8 @@ config ROOT_NFS config NFS_FSCACHE bool "Provide NFS client caching support" - depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y - select NETFS_SUPPORT + depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y + select FSCACHE help Say Y here if you want NFS data to be cached locally on disc through the general filesystem cache manager diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index b05717fe0d4e4f5b505f98e52e25273fe216b325..2d1bfee225c3693d4443c62463944ecf04439bca 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -274,12 +274,6 @@ static void nfs_netfs_free_request(struct netfs_io_request *rreq) put_nfs_open_context(rreq->netfs_priv); } -static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq) -{ - return fscache_begin_read_operation(&rreq->cache_resources, - netfs_i_cookie(netfs_inode(rreq->inode))); -} - static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq) { struct nfs_netfs_io_data *netfs; @@ -387,7 +381,6 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) const struct netfs_request_ops nfs_netfs_ops = { .init_request = nfs_netfs_init_request, .free_request = nfs_netfs_free_request, - .begin_cache_operation = nfs_netfs_begin_cache_operation, .issue_read = nfs_netfs_issue_read, .clamp_length = nfs_netfs_clamp_length }; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 5407ab8c8783574da8c83e0e752f8d3640bfb8d7..e3cb4923316b2cc044fd87ef6399c867ba19663c 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -80,7 +80,7 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) } static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { - netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops); + netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index d64a306a414be0580e910842b19f150bf43863a9..971892620504730e6e2265f50c54874f3d676eac 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -151,7 +151,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 60027f5aebe87f2050584994ee68699ae7ed6e5b..3e4209f41c18f854a190c523b67e3c105689ca2f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -659,6 +659,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, spin_lock(&tcon->stat_lock); tcon->bytes_read = 0; tcon->bytes_written = 0; + tcon->stats_from_time = ktime_get_real_seconds(); spin_unlock(&tcon->stat_lock); if (server->ops->clear_stats) server->ops->clear_stats(tcon); @@ -737,8 +738,9 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n%d) %s", i, tcon->tree_name); if (tcon->need_reconnect) seq_puts(m, "\tDISCONNECTED "); - seq_printf(m, "\nSMBs: %d", - atomic_read(&tcon->num_smbs_sent)); + seq_printf(m, "\nSMBs: %d since %ptTs UTC", + atomic_read(&tcon->num_smbs_sent), + &tcon->stats_from_time); if (server->ops->print_stats) server->ops->print_stats(m, tcon); } diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 99b0ade833aa3c5469405da758709a8cc36a18f9..e902de4e475af9cc3483fba922a1b11cbb068cd9 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -430,7 +430,7 @@ static void cifs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); - if (inode->i_state & I_PINNING_FSCACHE_WB) + if (inode->i_state & I_PINNING_NETFS_WB) cifs_fscache_unuse_inode_cookie(inode, true); cifs_fscache_release_inode_cookie(inode); clear_inode(inode); @@ -681,6 +681,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",rasize=%u", cifs_sb->ctx->rasize); if (tcon->ses->server->min_offload) seq_printf(s, ",esize=%u", tcon->ses->server->min_offload); + if (tcon->ses->server->retrans) + seq_printf(s, ",retrans=%u", tcon->ses->server->retrans); seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); @@ -793,8 +795,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) { - fscache_unpin_writeback(wbc, cifs_inode_cookie(inode)); - return 0; + return netfs_unpin_writeback(inode, wbc); } static int cifs_drop_inode(struct inode *inode) @@ -1222,7 +1223,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s if (rc < 0) goto set_failed; - netfs_resize_file(&src_cifsi->netfs, src_end); + netfs_resize_file(&src_cifsi->netfs, src_end, true); fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end); return 0; @@ -1353,7 +1354,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, smb_file_src, smb_file_target, off, len, destoff); if (rc == 0 && new_size > i_size_read(target_inode)) { truncate_setsize(target_inode, new_size); - netfs_resize_file(&target_cifsi->netfs, new_size); + netfs_resize_file(&target_cifsi->netfs, new_size, true); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 879d5ef8a66eda8bd3c0aeb8dcea6556ce7acef8..20036fb16cececeaa3acffb78d81691ac86b1ec3 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -204,6 +204,8 @@ struct cifs_open_info_data { }; } reparse; char *symlink_target; + struct cifs_sid posix_owner; + struct cifs_sid posix_group; union { struct smb2_file_all_info fi; struct smb311_posix_qinfo posix_fi; @@ -751,6 +753,7 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; unsigned int min_offload; + unsigned int retrans; __le16 compress_algorithm; __u16 signing_algorithm; __le16 cipher_type; @@ -1207,6 +1210,7 @@ struct cifs_tcon { __u64 bytes_read; __u64 bytes_written; spinlock_t stat_lock; /* protects the two fields above */ + time64_t stats_from_time; FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 3052a208c6ca05aa52c7e297c1c2f6eed0af7b40..bfd568f8971056b2c9ffbd509e026f140549f1af 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1574,6 +1574,9 @@ static int match_server(struct TCP_Server_Info *server, if (server->min_offload != ctx->min_offload) return 0; + if (server->retrans != ctx->retrans) + return 0; + return 1; } @@ -1798,6 +1801,7 @@ smbd_connected: goto out_err_crypto_release; } tcp_ses->min_offload = ctx->min_offload; + tcp_ses->retrans = ctx->retrans; /* * at this point we are the only ones with the pointer * to the struct since the kernel thread not created yet diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 1b4262aff8fab0d66d885dcae7134fb87ba19f85..3a213432775b167dfc844df81de1aba42c5fcfb9 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -5043,27 +5043,13 @@ static void cifs_swap_deactivate(struct file *file) /* do we need to unpin (or unlock) the file */ } -/* - * Mark a page as having been made dirty and thus needing writeback. We also - * need to pin the cache object to write back to. - */ -#ifdef CONFIG_CIFS_FSCACHE -static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) -{ - return fscache_dirty_folio(mapping, folio, - cifs_inode_cookie(mapping->host)); -} -#else -#define cifs_dirty_folio filemap_dirty_folio -#endif - const struct address_space_operations cifs_addr_ops = { .read_folio = cifs_read_folio, .readahead = cifs_readahead, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, - .dirty_folio = cifs_dirty_folio, + .dirty_folio = netfs_dirty_folio, .release_folio = cifs_release_folio, .direct_IO = cifs_direct_io, .invalidate_folio = cifs_invalidate_folio, @@ -5087,7 +5073,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, - .dirty_folio = cifs_dirty_folio, + .dirty_folio = netfs_dirty_folio, .release_folio = cifs_release_folio, .invalidate_folio = cifs_invalidate_folio, .launder_folio = cifs_launder_folio, diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index a3493da12ad1e6cbac7249f3e8464cf7eeff542e..52cbef2eeb28f6ba0013063b4bafcecc08c3a02d 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -139,6 +139,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_u32("dir_mode", Opt_dirmode), fsparam_u32("port", Opt_port), fsparam_u32("min_enc_offload", Opt_min_enc_offload), + fsparam_u32("retrans", Opt_retrans), fsparam_u32("esize", Opt_min_enc_offload), fsparam_u32("bsize", Opt_blocksize), fsparam_u32("rasize", Opt_rasize), @@ -1064,6 +1065,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_min_enc_offload: ctx->min_offload = result.uint_32; break; + case Opt_retrans: + ctx->retrans = result.uint_32; + break; case Opt_blocksize: /* * inode blocksize realistically should never need to be @@ -1619,6 +1623,8 @@ int smb3_init_fs_context(struct fs_context *fc) ctx->backupuid_specified = false; /* no backup intent for a user */ ctx->backupgid_specified = false; /* no backup intent for a group */ + ctx->retrans = 1; + /* * short int override_uid = -1; * short int override_gid = -1; diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index cf46916286d029a9bd36ea4980786456bd11449f..182ce11cbe9362eccf73eebadcdcfc2ee7ac7988 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -118,6 +118,7 @@ enum cifs_param { Opt_file_mode, Opt_dirmode, Opt_min_enc_offload, + Opt_retrans, Opt_blocksize, Opt_rasize, Opt_rsize, @@ -245,6 +246,7 @@ struct smb3_fs_context { unsigned int rsize; unsigned int wsize; unsigned int min_offload; + unsigned int retrans; bool sockopt_tcp_nodelay:1; /* attribute cache timemout for files and directories in jiffies */ unsigned long acregmax; diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index e5cad149f5a2d7d3f12d53ef61f78332a828c367..c4a3cb736881ae73fe2e002fcb2f5cadbe6cd731 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -180,7 +180,7 @@ static int fscache_fallback_write_pages(struct inode *inode, loff_t start, size_ if (ret < 0) return ret; - ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode), + ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode), no_space_allocated_yet); if (ret == 0) ret = fscache_write(&cres, start, &iter, NULL, NULL); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 9f37c1758f732cb310a0dc958b288b225e851412..f0989484f2c648796d923fcd3f998b150b1f92cf 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -665,8 +665,6 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from POSIX info struct */ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data, - struct cifs_sid *owner, - struct cifs_sid *group, struct super_block *sb) { struct smb311_posix_qinfo *info = &data->posix_fi; @@ -722,8 +720,8 @@ out_reparse: fattr->cf_symlink_target = data->symlink_target; data->symlink_target = NULL; } - sid_to_id(cifs_sb, owner, fattr, SIDOWNER); - sid_to_id(cifs_sb, group, fattr, SIDGROUP); + sid_to_id(cifs_sb, &data->posix_owner, fattr, SIDOWNER); + sid_to_id(cifs_sb, &data->posix_group, fattr, SIDGROUP); cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n", fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); @@ -1070,9 +1068,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, - struct cifs_fattr *fattr, - struct cifs_sid *owner, - struct cifs_sid *group) + struct cifs_fattr *fattr) { struct TCP_Server_Info *server = tcon->ses->server; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); @@ -1117,7 +1113,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } if (tcon->posix_extensions) - smb311_posix_info_to_fattr(fattr, data, owner, group, sb); + smb311_posix_info_to_fattr(fattr, data, sb); else cifs_open_info_to_fattr(fattr, data, sb); out: @@ -1171,8 +1167,7 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, */ if (cifs_open_data_reparse(data)) { rc = reparse_info_to_fattr(data, sb, xid, tcon, - full_path, fattr, - NULL, NULL); + full_path, fattr); } else { cifs_open_info_to_fattr(fattr, data, sb); } @@ -1317,10 +1312,10 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data, const unsigned int xid) { struct cifs_open_info_data tmp_data = {}; + struct TCP_Server_Info *server; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon; struct tcon_link *tlink; - struct cifs_sid owner, group; int tmprc; int rc = 0; @@ -1328,14 +1323,14 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data, if (IS_ERR(tlink)) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + server = tcon->ses->server; /* * 1. Fetch file metadata if not provided (data) */ if (!data) { - rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, - full_path, &tmp_data, - &owner, &group); + rc = server->ops->query_path_info(xid, tcon, cifs_sb, + full_path, &tmp_data); data = &tmp_data; } @@ -1347,11 +1342,9 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data, case 0: if (cifs_open_data_reparse(data)) { rc = reparse_info_to_fattr(data, sb, xid, tcon, - full_path, fattr, - &owner, &group); + full_path, fattr); } else { - smb311_posix_info_to_fattr(fattr, data, - &owner, &group, sb); + smb311_posix_info_to_fattr(fattr, data, sb); } break; case -EREMOTE: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index c2137ea3c2538937665056619d3ad17a0089eb29..0748d7b757b95a88abcab10418d5f4d8dc78642d 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -140,6 +140,7 @@ tcon_info_alloc(bool dir_leases_enabled) spin_lock_init(&ret_buf->stat_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); + ret_buf->stats_from_time = ktime_get_real_seconds(); #ifdef CONFIG_CIFS_DFS_UPCALL INIT_LIST_HEAD(&ret_buf->dfs_ses_list); #endif diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 056cae1ddccef274010b09e64ddbe5231a485f40..94255401b38dcb24c705f255731db2791e171c8d 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -133,14 +133,14 @@ retry: * Query dir responses don't provide enough * information about reparse points other than * their reparse tags. Save an invalidation by - * not clobbering the existing mode, size and - * symlink target (if any) when reparse tag and - * ctime haven't changed. + * not clobbering some existing attributes when + * reparse tag and ctime haven't changed. */ rc = 0; if (fattr->cf_cifsattrs & ATTR_REPARSE) { if (likely(reparse_inode_match(inode, fattr))) { fattr->cf_mode = inode->i_mode; + fattr->cf_rdev = inode->i_rdev; fattr->cf_eof = CIFS_I(inode)->server_eof; fattr->cf_symlink_target = NULL; } else { @@ -645,10 +645,10 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode) static int is_dir_changed(struct file *file) { struct inode *inode = file_inode(file); - struct cifsInodeInfo *cifsInfo = CIFS_I(inode); + struct cifsInodeInfo *cifs_inode_info = CIFS_I(inode); - if (cifsInfo->time == 0) - return 1; /* directory was changed, perhaps due to unlink */ + if (cifs_inode_info->time == 0) + return 1; /* directory was changed, e.g. unlink or new file */ else return 0; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5053a5550abeda064234e82c037fbbd89df6f746..a652200540c8aa5d2aa0ecd68ed50cc66f587d05 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -56,6 +56,35 @@ static inline __u32 file_create_options(struct dentry *dentry) return 0; } +/* Parse owner and group from SMB3.1.1 POSIX query info */ +static int parse_posix_sids(struct cifs_open_info_data *data, + struct kvec *rsp_iov) +{ + struct smb2_query_info_rsp *qi = rsp_iov->iov_base; + unsigned int out_len = le32_to_cpu(qi->OutputBufferLength); + unsigned int qi_len = sizeof(data->posix_fi); + int owner_len, group_len; + u8 *sidsbuf, *sidsbuf_end; + + if (out_len <= qi_len) + return -EINVAL; + + sidsbuf = (u8 *)qi + le16_to_cpu(qi->OutputBufferOffset) + qi_len; + sidsbuf_end = sidsbuf + out_len - qi_len; + + owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end); + if (owner_len == -1) + return -EINVAL; + + memcpy(&data->posix_owner, sidsbuf, owner_len); + group_len = posix_info_sid_size(sidsbuf + owner_len, sidsbuf_end); + if (group_len == -1) + return -EINVAL; + + memcpy(&data->posix_group, sidsbuf + owner_len, group_len); + return 0; +} + /* * note: If cfile is passed, the reference to it is dropped here. * So make sure that you do not reuse cfile after return from this func. @@ -69,7 +98,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, __u32 desired_access, __u32 create_disposition, __u32 create_options, umode_t mode, struct kvec *in_iov, int *cmds, int num_cmds, struct cifsFileInfo *cfile, - __u8 **extbuf, size_t *extbuflen, struct kvec *out_iov, int *out_buftype) { @@ -494,21 +522,9 @@ finished: &rsp_iov[i + 1], sizeof(idata->posix_fi) /* add SIDs */, (char *)&idata->posix_fi); } - if (rc == 0) { - unsigned int length = le32_to_cpu(qi_rsp->OutputBufferLength); - - if (length > sizeof(idata->posix_fi)) { - char *base = (char *)rsp_iov[i + 1].iov_base + - le16_to_cpu(qi_rsp->OutputBufferOffset) + - sizeof(idata->posix_fi); - *extbuflen = length - sizeof(idata->posix_fi); - *extbuf = kmemdup(base, *extbuflen, GFP_KERNEL); - if (!*extbuf) - rc = -ENOMEM; - } else { - rc = -EINVAL; - } - } + if (rc == 0) + rc = parse_posix_sids(idata, &rsp_iov[i + 1]); + SMB2_query_info_free(&rqst[num_rqst++]); if (rc) trace_smb3_posix_query_info_compound_err(xid, ses->Suid, @@ -662,7 +678,7 @@ int smb2_query_path_info(const unsigned int xid, struct smb2_hdr *hdr; struct kvec in_iov[2], out_iov[3] = {}; int out_buftype[3] = {}; - int cmds[2] = { SMB2_OP_QUERY_INFO, }; + int cmds[2]; bool islink; int i, num_cmds; int rc, rc2; @@ -670,20 +686,36 @@ int smb2_query_path_info(const unsigned int xid, data->adjust_tz = false; data->reparse_point = false; - if (strcmp(full_path, "")) - rc = -ENOENT; - else - rc = open_cached_dir(xid, tcon, full_path, cifs_sb, false, &cfid); - /* If it is a root and its handle is cached then use it */ - if (!rc) { - if (cfid->file_all_info_is_valid) { - memcpy(&data->fi, &cfid->file_all_info, sizeof(data->fi)); + /* + * BB TODO: Add support for using cached root handle in SMB3.1.1 POSIX. + * Create SMB2_query_posix_info worker function to do non-compounded + * query when we already have an open file handle for this. For now this + * is fast enough (always using the compounded version). + */ + if (!tcon->posix_extensions) { + if (*full_path) { + rc = -ENOENT; } else { - rc = SMB2_query_info(xid, tcon, cfid->fid.persistent_fid, - cfid->fid.volatile_fid, &data->fi); + rc = open_cached_dir(xid, tcon, full_path, + cifs_sb, false, &cfid); + } + /* If it is a root and its handle is cached then use it */ + if (!rc) { + if (cfid->file_all_info_is_valid) { + memcpy(&data->fi, &cfid->file_all_info, + sizeof(data->fi)); + } else { + rc = SMB2_query_info(xid, tcon, + cfid->fid.persistent_fid, + cfid->fid.volatile_fid, + &data->fi); + } + close_cached_dir(cfid); + return rc; } - close_cached_dir(cfid); - return rc; + cmds[0] = SMB2_OP_QUERY_INFO; + } else { + cmds[0] = SMB2_OP_POSIX_QUERY_INFO; } in_iov[0].iov_base = data; @@ -693,9 +725,8 @@ int smb2_query_path_info(const unsigned int xid, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, ACL_NO_MODE, - in_iov, cmds, 1, cfile, - NULL, NULL, out_iov, out_buftype); + create_options, ACL_NO_MODE, in_iov, + cmds, 1, cfile, out_iov, out_buftype); hdr = out_iov[0].iov_base; /* * If first iov is unset, then SMB session was dropped or we've got a @@ -707,6 +738,10 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: case -EOPNOTSUPP: + /* + * BB TODO: When support for special files added to Samba + * re-verify this path. + */ rc = parse_create_response(data, cifs_sb, &out_iov[0]); if (rc || !data->reparse_point) goto out; @@ -722,8 +757,8 @@ int smb2_query_path_info(const unsigned int xid, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, ACL_NO_MODE, in_iov, cmds, - num_cmds, cfile, NULL, NULL, NULL, NULL); + create_options, ACL_NO_MODE, in_iov, + cmds, num_cmds, cfile, NULL, NULL); break; case -EREMOTE: break; @@ -746,101 +781,6 @@ out: return rc; } -int smb311_posix_query_path_info(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *full_path, - struct cifs_open_info_data *data, - struct cifs_sid *owner, - struct cifs_sid *group) -{ - int rc; - __u32 create_options = 0; - struct cifsFileInfo *cfile; - struct kvec in_iov[2], out_iov[3] = {}; - int out_buftype[3] = {}; - __u8 *sidsbuf = NULL; - __u8 *sidsbuf_end = NULL; - size_t sidsbuflen = 0; - size_t owner_len, group_len; - int cmds[2] = { SMB2_OP_POSIX_QUERY_INFO, }; - int i, num_cmds; - - data->adjust_tz = false; - data->reparse_point = false; - - /* - * BB TODO: Add support for using the cached root handle. - * Create SMB2_query_posix_info worker function to do non-compounded query - * when we already have an open file handle for this. For now this is fast enough - * (always using the compounded version). - */ - in_iov[0].iov_base = data; - in_iov[0].iov_len = sizeof(*data); - in_iov[1] = in_iov[0]; - - cifs_get_readable_path(tcon, full_path, &cfile); - rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, ACL_NO_MODE, in_iov, cmds, 1, - cfile, &sidsbuf, &sidsbuflen, out_iov, out_buftype); - /* - * If first iov is unset, then SMB session was dropped or we've got a - * cached open file (@cfile). - */ - if (!out_iov[0].iov_base || out_buftype[0] == CIFS_NO_BUFFER) - goto out; - - switch (rc) { - case 0: - case -EOPNOTSUPP: - /* BB TODO: When support for special files added to Samba re-verify this path */ - rc = parse_create_response(data, cifs_sb, &out_iov[0]); - if (rc || !data->reparse_point) - goto out; - - if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK) { - /* symlink already parsed in create response */ - num_cmds = 1; - } else { - cmds[1] = SMB2_OP_GET_REPARSE; - num_cmds = 2; - } - create_options |= OPEN_REPARSE_POINT; - cifs_get_readable_path(tcon, full_path, &cfile); - rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, ACL_NO_MODE, in_iov, cmds, - num_cmds, cfile, &sidsbuf, &sidsbuflen, NULL, NULL); - break; - } - -out: - if (rc == 0) { - sidsbuf_end = sidsbuf + sidsbuflen; - - owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end); - if (owner_len == -1) { - rc = -EINVAL; - goto out; - } - memcpy(owner, sidsbuf, owner_len); - - group_len = posix_info_sid_size( - sidsbuf + owner_len, sidsbuf_end); - if (group_len == -1) { - rc = -EINVAL; - goto out; - } - memcpy(group, sidsbuf + owner_len, group_len); - } - - kfree(sidsbuf); - for (i = 0; i < ARRAY_SIZE(out_buftype); i++) - free_rsp_buf(out_buftype[i], out_iov[i].iov_base); - return rc; -} - int smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, struct cifs_tcon *tcon, const char *name, @@ -848,9 +788,9 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, { return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, - CREATE_NOT_FILE, mode, NULL, - &(int){SMB2_OP_MKDIR}, 1, - NULL, NULL, NULL, NULL, NULL); + CREATE_NOT_FILE, mode, + NULL, &(int){SMB2_OP_MKDIR}, 1, + NULL, NULL, NULL); } void @@ -875,7 +815,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, &in_iov, &(int){SMB2_OP_SET_INFO}, 1, - cfile, NULL, NULL, NULL, NULL); + cfile, NULL, NULL); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } @@ -887,8 +827,9 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, drop_cached_dir_by_name(xid, tcon, name, cifs_sb); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_NOT_FILE, - ACL_NO_MODE, NULL, &(int){SMB2_OP_RMDIR}, 1, - NULL, NULL, NULL, NULL, NULL); + ACL_NO_MODE, NULL, + &(int){SMB2_OP_RMDIR}, 1, + NULL, NULL, NULL); } int @@ -897,8 +838,9 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - ACL_NO_MODE, NULL, &(int){SMB2_OP_DELETE}, 1, - NULL, NULL, NULL, NULL, NULL); + ACL_NO_MODE, NULL, + &(int){SMB2_OP_DELETE}, 1, + NULL, NULL, NULL); } static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, @@ -919,8 +861,8 @@ static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, in_iov.iov_base = smb2_to_name; in_iov.iov_len = 2 * UniStrnlen((wchar_t *)smb2_to_name, PATH_MAX); rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, - FILE_OPEN, create_options, ACL_NO_MODE, &in_iov, - &command, 1, cfile, NULL, NULL, NULL, NULL); + FILE_OPEN, create_options, ACL_NO_MODE, + &in_iov, &command, 1, cfile, NULL, NULL); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -971,7 +913,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, &in_iov, &(int){SMB2_OP_SET_EOF}, 1, - cfile, NULL, NULL, NULL, NULL); + cfile, NULL, NULL); } int @@ -999,8 +941,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path, rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, ACL_NO_MODE, &in_iov, - &(int){SMB2_OP_SET_INFO}, 1, cfile, - NULL, NULL, NULL, NULL); + &(int){SMB2_OP_SET_INFO}, 1, + cfile, NULL, NULL); cifs_put_tlink(tlink); return rc; } @@ -1035,7 +977,7 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, da, cd, co, ACL_NO_MODE, in_iov, - cmds, 2, cfile, NULL, NULL, NULL, NULL); + cmds, 2, cfile, NULL, NULL); if (!rc) { rc = smb311_posix_get_inode_info(&new, full_path, data, sb, xid); @@ -1045,7 +987,7 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, da, cd, co, ACL_NO_MODE, in_iov, - cmds, 2, cfile, NULL, NULL, NULL, NULL); + cmds, 2, cfile, NULL, NULL); if (!rc) { rc = cifs_get_inode_info(&new, full_path, data, sb, xid, NULL); @@ -1072,8 +1014,8 @@ int smb2_query_reparse_point(const unsigned int xid, rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE, &in_iov, - &(int){SMB2_OP_GET_REPARSE}, 1, cfile, - NULL, NULL, NULL, NULL); + &(int){SMB2_OP_GET_REPARSE}, 1, + cfile, NULL, NULL); if (rc) goto out; diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index 1a90dd78b238f0de191421bd0d1838164bff9778..ac1895358908abff42e51059644aed30be670373 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -1210,6 +1210,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_INVALID_TASK_INDEX, -EIO, "STATUS_INVALID_TASK_INDEX"}, {STATUS_THREAD_ALREADY_IN_TASK, -EIO, "STATUS_THREAD_ALREADY_IN_TASK"}, {STATUS_CALLBACK_BYPASS, -EIO, "STATUS_CALLBACK_BYPASS"}, + {STATUS_SERVER_UNAVAILABLE, -EAGAIN, "STATUS_SERVER_UNAVAILABLE"}, + {STATUS_FILE_NOT_AVAILABLE, -EAGAIN, "STATUS_FILE_NOT_AVAILABLE"}, {STATUS_PORT_CLOSED, -EIO, "STATUS_PORT_CLOSED"}, {STATUS_MESSAGE_LOST, -EIO, "STATUS_MESSAGE_LOST"}, {STATUS_INVALID_MESSAGE, -EIO, "STATUS_INVALID_MESSAGE"}, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 01a5bd7e6a307f1d20619001e2a1e5da7f8e2e87..d9553c2556a290dcea14434e00df9d854e713aa3 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -614,7 +614,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, "multichannel not available\n" "Empty network interface list returned by server %s\n", ses->server->hostname); - rc = -EINVAL; + rc = -EOPNOTSUPP; + ses->iface_last_update = jiffies; goto out; } @@ -712,7 +713,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, ses->iface_count++; spin_unlock(&ses->iface_lock); - ses->iface_last_update = jiffies; next_iface: nb_iface++; next = le32_to_cpu(p->Next); @@ -734,11 +734,7 @@ next_iface: if ((bytes_left > 8) || p->Next) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); - - if (!ses->iface_count) { - rc = -EINVAL; - goto out; - } + ses->iface_last_update = jiffies; out: /* diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index bd25c34dc398b6460c37e3c1ea778fdc0cca6b80..288199f0b987df98ba3fab9320523bc16e73092d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -156,6 +156,57 @@ out: return; } +/* helper function for code reuse */ +static int +cifs_chan_skip_or_disable(struct cifs_ses *ses, + struct TCP_Server_Info *server, + bool from_reconnect) +{ + struct TCP_Server_Info *pserver; + unsigned int chan_index; + + if (SERVER_IS_CHAN(server)) { + cifs_dbg(VFS, + "server %s does not support multichannel anymore. Skip secondary channel\n", + ses->server->hostname); + + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + goto skip_terminate; + } + + ses->chans[chan_index].server = NULL; + spin_unlock(&ses->chan_lock); + + /* + * the above reference of server by channel + * needs to be dropped without holding chan_lock + * as cifs_put_tcp_session takes a higher lock + * i.e. cifs_tcp_ses_lock + */ + cifs_put_tcp_session(server, from_reconnect); + + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + + /* mark primary server as needing reconnect */ + pserver = server->primary_server; + cifs_signal_cifsd_for_reconnect(pserver, false); +skip_terminate: + mutex_unlock(&ses->session_mutex); + return -EHOSTDOWN; + } + + cifs_server_dbg(VFS, + "server does not support multichannel anymore. Disable all other channels\n"); + cifs_disable_secondary_channels(ses); + + + return 0; +} + static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct TCP_Server_Info *server, bool from_reconnect) @@ -164,8 +215,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; int xid; - struct TCP_Server_Info *pserver; - unsigned int chan_index; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -310,44 +359,11 @@ again: */ if (ses->chan_count > 1 && !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { - if (SERVER_IS_CHAN(server)) { - cifs_dbg(VFS, "server %s does not support " \ - "multichannel anymore. skipping secondary channel\n", - ses->server->hostname); - - spin_lock(&ses->chan_lock); - chan_index = cifs_ses_get_chan_index(ses, server); - if (chan_index == CIFS_INVAL_CHAN_INDEX) { - spin_unlock(&ses->chan_lock); - goto skip_terminate; - } - - ses->chans[chan_index].server = NULL; - spin_unlock(&ses->chan_lock); - - /* - * the above reference of server by channel - * needs to be dropped without holding chan_lock - * as cifs_put_tcp_session takes a higher lock - * i.e. cifs_tcp_ses_lock - */ - cifs_put_tcp_session(server, from_reconnect); - - server->terminate = true; - cifs_signal_cifsd_for_reconnect(server, false); - - /* mark primary server as needing reconnect */ - pserver = server->primary_server; - cifs_signal_cifsd_for_reconnect(pserver, false); - -skip_terminate: + rc = cifs_chan_skip_or_disable(ses, server, + from_reconnect); + if (rc) { mutex_unlock(&ses->session_mutex); - rc = -EHOSTDOWN; goto out; - } else { - cifs_server_dbg(VFS, "does not support " \ - "multichannel anymore. disabling all other channels\n"); - cifs_disable_secondary_channels(ses); } } @@ -395,20 +411,35 @@ skip_sess_setup: rc = SMB3_request_interfaces(xid, tcon, false); free_xid(xid); - if (rc) + if (rc == -EOPNOTSUPP) { + /* + * some servers like Azure SMB server do not advertise + * that multichannel has been disabled with server + * capabilities, rather return STATUS_NOT_IMPLEMENTED. + * treat this as server not supporting multichannel + */ + + rc = cifs_chan_skip_or_disable(ses, server, + from_reconnect); + goto skip_add_channels; + } else if (rc) cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); if (ses->chan_max > ses->chan_count && + ses->iface_count && !SERVER_IS_CHAN(server)) { if (ses->chan_count == 1) cifs_server_dbg(VFS, "supports multichannel now\n"); cifs_try_adding_channels(ses); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); } } else { mutex_unlock(&ses->session_mutex); } +skip_add_channels: if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); @@ -1958,10 +1989,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, __le16 *unc_path = NULL; int flags = 0; unsigned int total_len; - struct TCP_Server_Info *server; - - /* always use master channel */ - server = ses->server; + struct TCP_Server_Info *server = cifs_pick_channel(ses); cifs_dbg(FYI, "TCON\n"); @@ -2094,6 +2122,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) struct smb2_tree_disconnect_req *req; /* response is trivial */ int rc = 0; struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *server = cifs_pick_channel(ses); int flags = 0; unsigned int total_len; struct kvec iov[1]; @@ -2116,7 +2145,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) invalidate_all_cached_dirs(tcon); - rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, ses->server, + rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, server, (void **) &req, &total_len); if (rc) @@ -2134,7 +2163,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) rqst.rq_iov = iov; rqst.rq_nvec = 1; - rc = cifs_send_recv(xid, ses, ses->server, + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { @@ -2279,7 +2308,7 @@ int smb2_parse_contexts(struct TCP_Server_Info *server, noff = le16_to_cpu(cc->NameOffset); nlen = le16_to_cpu(cc->NameLength); - if (noff + nlen >= doff) + if (noff + nlen > doff) return -EINVAL; name = (char *)cc + noff; @@ -3918,7 +3947,7 @@ void smb2_reconnect_server(struct work_struct *work) struct cifs_ses *ses, *ses2; struct cifs_tcon *tcon, *tcon2; struct list_head tmp_list, tmp_ses_list; - bool tcon_exist = false, ses_exist = false; + bool ses_exist = false; bool tcon_selected = false; int rc; bool resched = false; @@ -3964,7 +3993,7 @@ void smb2_reconnect_server(struct work_struct *work) if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; list_add_tail(&tcon->rlist, &tmp_list); - tcon_selected = tcon_exist = true; + tcon_selected = true; } } /* @@ -3973,7 +4002,7 @@ void smb2_reconnect_server(struct work_struct *work) */ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); - tcon_selected = tcon_exist = true; + tcon_selected = true; cifs_smb_ses_inc_refcount(ses); } /* diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 343ada691e763bfce3eb64f1c7871922f7eb7a76..0034b537b0b3f9dd057ce5183f05b6fedb147f77 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -299,9 +299,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct cifs_open_info_data *data, - struct cifs_sid *owner, - struct cifs_sid *group); + struct cifs_open_info_data *data); int posix_info_parse(const void *beg, const void *end, struct smb2_posix_info_parsed *out); int posix_info_sid_size(const void *beg, const void *end); diff --git a/fs/smb/client/smb2status.h b/fs/smb/client/smb2status.h index a9e958166fc53a3c4b5d7a23efb8d326deec3543..9c6d79b0bd4978cea9e33bcfd17432219a9a5232 100644 --- a/fs/smb/client/smb2status.h +++ b/fs/smb/client/smb2status.h @@ -982,6 +982,8 @@ struct ntstatus { #define STATUS_INVALID_TASK_INDEX cpu_to_le32(0xC0000501) #define STATUS_THREAD_ALREADY_IN_TASK cpu_to_le32(0xC0000502) #define STATUS_CALLBACK_BYPASS cpu_to_le32(0xC0000503) +#define STATUS_SERVER_UNAVAILABLE cpu_to_le32(0xC0000466) +#define STATUS_FILE_NOT_AVAILABLE cpu_to_le32(0xC0000467) #define STATUS_PORT_CLOSED cpu_to_le32(0xC0000700) #define STATUS_MESSAGE_LOST cpu_to_le32(0xC0000701) #define STATUS_INVALID_MESSAGE cpu_to_le32(0xC0000702) diff --git a/fs/smb/server/asn1.c b/fs/smb/server/asn1.c index 4a4b2b03ff33df060c4c7b16112b98e7a8a3a7c4..b931a99ab9c85e016319244070a21bbf54028ef7 100644 --- a/fs/smb/server/asn1.c +++ b/fs/smb/server/asn1.c @@ -214,10 +214,15 @@ static int ksmbd_neg_token_alloc(void *context, size_t hdrlen, { struct ksmbd_conn *conn = context; + if (!vlen) + return -EINVAL; + conn->mechToken = kmemdup_nul(value, vlen, GFP_KERNEL); if (!conn->mechToken) return -ENOMEM; + conn->mechTokenLen = (unsigned int)vlen; + return 0; } diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index d311c2ee10bd7f82172342dd53e58f07a36d1702..09e1e7771592f522e44e309505078e04b6853cad 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -416,13 +416,7 @@ static void stop_sessions(void) again: down_read(&conn_list_lock); list_for_each_entry(conn, &conn_list, conns_list) { - struct task_struct *task; - t = conn->transport; - task = t->handler; - if (task) - ksmbd_debug(CONN, "Stop session handler %s/%d\n", - task->comm, task_pid_nr(task)); ksmbd_conn_set_exiting(conn); if (t->ops->shutdown) { up_read(&conn_list_lock); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 3c005246a32e8d2c38bde51b3ea8994e319c9c6b..0e04cf8b1d896ab346834b94dd912c53c86c2b0f 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -88,6 +88,7 @@ struct ksmbd_conn { __u16 dialect; char *mechToken; + unsigned int mechTokenLen; struct ksmbd_conn_ops *conn_ops; @@ -134,7 +135,6 @@ struct ksmbd_transport_ops { struct ksmbd_transport { struct ksmbd_conn *conn; struct ksmbd_transport_ops *ops; - struct task_struct *handler; }; #define KSMBD_TCP_RECV_TIMEOUT (7 * HZ) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 001926d3b348c88ff98bc1766a6a8b280415e39e..53dfaac425c68dc5f2192924b546b4f9fb71f6c8 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1197,6 +1197,12 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, bool prev_op_has_lease; __le32 prev_op_state = 0; + /* Only v2 leases handle the directory */ + if (S_ISDIR(file_inode(fp->filp)->i_mode)) { + if (!lctx || lctx->version != 2) + return 0; + } + opinfo = alloc_opinfo(work, pid, tid); if (!opinfo) return -ENOMEM; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 3143819935dca1a90fbc5355f11786afd61aae1a..ba7a72a6a4f45f6b756768c4a3a48e19d74e3683 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1414,7 +1414,10 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn, char *name; unsigned int name_off, name_len, secbuf_len; - secbuf_len = le16_to_cpu(req->SecurityBufferLength); + if (conn->use_spnego && conn->mechToken) + secbuf_len = conn->mechTokenLen; + else + secbuf_len = le16_to_cpu(req->SecurityBufferLength); if (secbuf_len < sizeof(struct authenticate_message)) { ksmbd_debug(SMB, "blob len %d too small\n", secbuf_len); return NULL; @@ -1505,7 +1508,10 @@ static int ntlm_authenticate(struct ksmbd_work *work, struct authenticate_message *authblob; authblob = user_authblob(conn, req); - sz = le16_to_cpu(req->SecurityBufferLength); + if (conn->use_spnego && conn->mechToken) + sz = conn->mechTokenLen; + else + sz = le16_to_cpu(req->SecurityBufferLength); rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess); if (rc) { set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD); @@ -1778,8 +1784,7 @@ int smb2_sess_setup(struct ksmbd_work *work) negblob_off = le16_to_cpu(req->SecurityBufferOffset); negblob_len = le16_to_cpu(req->SecurityBufferLength); - if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) || - negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) { + if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer)) { rc = -EINVAL; goto out_err; } @@ -1788,8 +1793,15 @@ int smb2_sess_setup(struct ksmbd_work *work) negblob_off); if (decode_negotiation_token(conn, negblob, negblob_len) == 0) { - if (conn->mechToken) + if (conn->mechToken) { negblob = (struct negotiate_message *)conn->mechToken; + negblob_len = conn->mechTokenLen; + } + } + + if (negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) { + rc = -EINVAL; + goto out_err; } if (server_conf.auth_mechs & conn->auth_mechs) { diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index c5629a68c8b73ecf4f3cbc6fe728aac33ce434aa..8faa25c6e129b5ef7f38721ef398b942e56b0bc2 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2039,6 +2039,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) { struct smb_direct_transport *t; + struct task_struct *handler; int ret; if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { @@ -2056,11 +2057,11 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) if (ret) goto out_err; - KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop, - KSMBD_TRANS(t)->conn, "ksmbd:r%u", - smb_direct_port); - if (IS_ERR(KSMBD_TRANS(t)->handler)) { - ret = PTR_ERR(KSMBD_TRANS(t)->handler); + handler = kthread_run(ksmbd_conn_handler_loop, + KSMBD_TRANS(t)->conn, "ksmbd:r%u", + smb_direct_port); + if (IS_ERR(handler)) { + ret = PTR_ERR(handler); pr_err("Can't start thread\n"); goto out_err; } diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index eff7a1d793f00382078f3132f818a2dd4fe62cda..9d4222154dcc0c92201a0d7a6e4dac77e0eea37b 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -185,6 +185,7 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk) struct sockaddr *csin; int rc = 0; struct tcp_transport *t; + struct task_struct *handler; t = alloc_transport(client_sk); if (!t) { @@ -199,13 +200,13 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk) goto out_error; } - KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop, - KSMBD_TRANS(t)->conn, - "ksmbd:%u", - ksmbd_tcp_get_port(csin)); - if (IS_ERR(KSMBD_TRANS(t)->handler)) { + handler = kthread_run(ksmbd_conn_handler_loop, + KSMBD_TRANS(t)->conn, + "ksmbd:%u", + ksmbd_tcp_get_port(csin)); + if (IS_ERR(handler)) { pr_err("cannot start conn thread\n"); - rc = PTR_ERR(KSMBD_TRANS(t)->handler); + rc = PTR_ERR(handler); free_transport(t); } return rc; diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f0677ea0ec24e7156ae749c1990ec43263c1097b..6b211522a13ec100a0af815798d36536b7239c4a 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -32,6 +32,18 @@ */ static DEFINE_MUTEX(eventfs_mutex); +/* Choose something "unique" ;-) */ +#define EVENTFS_FILE_INODE_INO 0x12c4e37 + +/* Just try to make something consistent and unique */ +static int eventfs_dir_ino(struct eventfs_inode *ei) +{ + if (!ei->ino) + ei->ino = get_next_ino(); + + return ei->ino; +} + /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from * its parent's list and will have is_freed set (under eventfs_mutex). @@ -45,6 +57,7 @@ enum { EVENTFS_SAVE_MODE = BIT(16), EVENTFS_SAVE_UID = BIT(17), EVENTFS_SAVE_GID = BIT(18), + EVENTFS_TOPLEVEL = BIT(19), }; #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) @@ -52,9 +65,7 @@ enum { static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); -static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); -static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); -static int eventfs_release(struct inode *inode, struct file *file); +static int eventfs_iterate(struct file *file, struct dir_context *ctx); static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) { @@ -94,7 +105,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, /* Preallocate the children mode array if necessary */ if (!(dentry->d_inode->i_mode & S_IFDIR)) { if (!ei->entry_attrs) { - ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, + ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), GFP_NOFS); if (!ei->entry_attrs) { ret = -ENOMEM; @@ -117,10 +128,17 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, * The events directory dentry is never freed, unless its * part of an instance that is deleted. It's attr is the * default for its child files and directories. - * Do not update it. It's not used for its own mode or ownership + * Do not update it. It's not used for its own mode or ownership. */ - if (!ei->is_events) + if (ei->is_events) { + /* But it still needs to know if it was modified */ + if (iattr->ia_valid & ATTR_UID) + ei->attr.mode |= EVENTFS_SAVE_UID; + if (iattr->ia_valid & ATTR_GID) + ei->attr.mode |= EVENTFS_SAVE_GID; + } else { update_attr(&ei->attr, iattr); + } } else { name = dentry->d_name.name; @@ -138,9 +156,66 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } +static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) +{ + struct inode *inode; + + /* Only update if the "events" was on the top level */ + if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + return; + + /* Get the tracefs root inode. */ + inode = d_inode(dentry->d_sb->s_root); + ei->attr.uid = inode->i_uid; + ei->attr.gid = inode->i_gid; +} + +static void set_top_events_ownership(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + struct eventfs_inode *ei = ti->private; + struct dentry *dentry; + + /* The top events directory doesn't get automatically updated */ + if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + return; + + dentry = ei->dentry; + + update_top_events_attr(ei, dentry); + + if (!(ei->attr.mode & EVENTFS_SAVE_UID)) + inode->i_uid = ei->attr.uid; + + if (!(ei->attr.mode & EVENTFS_SAVE_GID)) + inode->i_gid = ei->attr.gid; +} + +static int eventfs_get_attr(struct mnt_idmap *idmap, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_backing_inode(dentry); + + set_top_events_ownership(inode); + + generic_fillattr(idmap, request_mask, inode, stat); + return 0; +} + +static int eventfs_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + set_top_events_ownership(inode); + return generic_permission(idmap, inode, mask); +} + static const struct inode_operations eventfs_root_dir_inode_operations = { .lookup = eventfs_root_lookup, .setattr = eventfs_set_attr, + .getattr = eventfs_get_attr, + .permission = eventfs_permission, }; static const struct inode_operations eventfs_file_inode_operations = { @@ -148,11 +223,9 @@ static const struct inode_operations eventfs_file_inode_operations = { }; static const struct file_operations eventfs_file_operations = { - .open = dcache_dir_open_wrapper, .read = generic_read_dir, - .iterate_shared = dcache_readdir_wrapper, + .iterate_shared = eventfs_iterate, .llseek = generic_file_llseek, - .release = eventfs_release, }; /* Return the evenfs_inode of the "events" directory */ @@ -178,6 +251,8 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) } while (!ei->is_events); mutex_unlock(&eventfs_mutex); + update_top_events_attr(ei, dentry); + return ei; } @@ -289,6 +364,9 @@ static struct dentry *create_file(const char *name, umode_t mode, inode->i_fop = fop; inode->i_private = data; + /* All files will have the same inode number */ + inode->i_ino = EVENTFS_FILE_INODE_INO; + ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; d_instantiate(dentry, inode); @@ -325,6 +403,9 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; + /* All directories will have the same inode number */ + inode->i_ino = eventfs_dir_ino(ei); + ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; @@ -390,16 +471,14 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) * @mode: The mode of the file. * @data: The data to use to set the inode of the file with on open() * @fops: The fops of the file to be created. - * @lookup: If called by the lookup routine, in which case, dput() the created dentry. * * Create a dentry for a file of an eventfs_inode @ei and place it into the - * address located at @e_dentry. If the @e_dentry already has a dentry, then - * just do a dget() on it and return. Otherwise create the dentry and attach it. + * address located at @e_dentry. */ static struct dentry * create_file_dentry(struct eventfs_inode *ei, int idx, struct dentry *parent, const char *name, umode_t mode, void *data, - const struct file_operations *fops, bool lookup) + const struct file_operations *fops) { struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; @@ -414,9 +493,7 @@ create_file_dentry(struct eventfs_inode *ei, int idx, } /* If the e_dentry already has a dentry, use it */ if (*e_dentry) { - /* lookup does not need to up the ref count */ - if (!lookup) - dget(*e_dentry); + dget(*e_dentry); mutex_unlock(&eventfs_mutex); return *e_dentry; } @@ -441,13 +518,12 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * way to being freed, don't return it. If e_dentry is NULL * it means it was already freed. */ - if (ei->is_freed) + if (ei->is_freed) { dentry = NULL; - else + } else { dentry = *e_dentry; - /* The lookup does not need to up the dentry refcount */ - if (dentry && !lookup) dget(dentry); + } mutex_unlock(&eventfs_mutex); return dentry; } @@ -465,9 +541,6 @@ create_file_dentry(struct eventfs_inode *ei, int idx, } mutex_unlock(&eventfs_mutex); - if (lookup) - dput(dentry); - return dentry; } @@ -500,13 +573,12 @@ static void eventfs_post_create_dir(struct eventfs_inode *ei) * @pei: The eventfs_inode parent of ei. * @ei: The eventfs_inode to create the directory for * @parent: The dentry of the parent of this directory - * @lookup: True if this is called by the lookup code * * This creates and attaches a directory dentry to the eventfs_inode @ei. */ static struct dentry * create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, - struct dentry *parent, bool lookup) + struct dentry *parent) { struct dentry *dentry = NULL; @@ -518,11 +590,9 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, return NULL; } if (ei->dentry) { - /* If the dentry already has a dentry, use it */ + /* If the eventfs_inode already has a dentry, use it */ dentry = ei->dentry; - /* lookup does not need to up the ref count */ - if (!lookup) - dget(dentry); + dget(dentry); mutex_unlock(&eventfs_mutex); return dentry; } @@ -542,7 +612,7 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * way to being freed. */ dentry = ei->dentry; - if (dentry && !lookup) + if (dentry) dget(dentry); mutex_unlock(&eventfs_mutex); return dentry; @@ -562,9 +632,6 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, } mutex_unlock(&eventfs_mutex); - if (lookup) - dput(dentry); - return dentry; } @@ -589,8 +656,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct eventfs_inode *ei; struct dentry *ei_dentry = NULL; struct dentry *ret = NULL; + struct dentry *d; const char *name = dentry->d_name.name; - bool created = false; umode_t mode; void *data; int idx; @@ -626,13 +693,10 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, ret = simple_lookup(dir, dentry, flags); if (IS_ERR(ret)) goto out; - create_dir_dentry(ei, ei_child, ei_dentry, true); - created = true; - break; - } - - if (created) + d = create_dir_dentry(ei, ei_child, ei_dentry); + dput(d); goto out; + } for (i = 0; i < ei->nr_entries; i++) { entry = &ei->entries[i]; @@ -650,8 +714,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, ret = simple_lookup(dir, dentry, flags); if (IS_ERR(ret)) goto out; - create_file_dentry(ei, i, ei_dentry, name, mode, cdata, - fops, true); + d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); + dput(d); break; } } @@ -660,172 +724,107 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, return ret; } -struct dentry_list { - void *cursor; - struct dentry **dentries; -}; - -/** - * eventfs_release - called to release eventfs file/dir - * @inode: inode to be released - * @file: file to be released (not used) - */ -static int eventfs_release(struct inode *inode, struct file *file) -{ - struct tracefs_inode *ti; - struct dentry_list *dlist = file->private_data; - void *cursor; - int i; - - ti = get_tracefs(inode); - if (!(ti->flags & TRACEFS_EVENT_INODE)) - return -EINVAL; - - if (WARN_ON_ONCE(!dlist)) - return -EINVAL; - - for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { - dput(dlist->dentries[i]); - } - - cursor = dlist->cursor; - kfree(dlist->dentries); - kfree(dlist); - file->private_data = cursor; - return dcache_dir_close(inode, file); -} - -static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) -{ - struct dentry **tmp; - - tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); - if (!tmp) - return -1; - tmp[cnt] = d; - tmp[cnt + 1] = NULL; - *dentries = tmp; - return 0; -} - -/** - * dcache_dir_open_wrapper - eventfs open wrapper - * @inode: not used - * @file: dir to be opened (to create it's children) - * - * Used to dynamic create file/dir with-in @file, all the - * file/dir will be created. If already created then references - * will be increased +/* + * Walk the children of a eventfs_inode to fill in getdents(). */ -static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) +static int eventfs_iterate(struct file *file, struct dir_context *ctx) { const struct file_operations *fops; + struct inode *f_inode = file_inode(file); const struct eventfs_entry *entry; struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct dentry_list *dlist; - struct dentry **dentries = NULL; - struct dentry *parent = file_dentry(file); - struct dentry *d; - struct inode *f_inode = file_inode(file); - const char *name = parent->d_name.name; + const char *name; umode_t mode; - void *data; - int cnt = 0; int idx; - int ret; - int i; - int r; + int ret = -EINVAL; + int ino; + int i, r, c; + + if (!dir_emit_dots(file, ctx)) + return 0; ti = get_tracefs(f_inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; - if (WARN_ON_ONCE(file->private_data)) - return -EINVAL; + c = ctx->pos - 2; idx = srcu_read_lock(&eventfs_srcu); mutex_lock(&eventfs_mutex); ei = READ_ONCE(ti->private); + if (ei && ei->is_freed) + ei = NULL; mutex_unlock(&eventfs_mutex); - if (!ei) { - srcu_read_unlock(&eventfs_srcu, idx); - return -EINVAL; - } - - - data = ei->data; + if (!ei) + goto out; - dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); - if (!dlist) { - srcu_read_unlock(&eventfs_srcu, idx); - return -ENOMEM; - } + /* + * Need to create the dentries and inodes to have a consistent + * inode number. + */ + ret = 0; - inode_lock(parent->d_inode); - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - d = create_dir_dentry(ei, ei_child, parent, false); - if (d) { - ret = add_dentries(&dentries, d, cnt); - if (ret < 0) - break; - cnt++; - } - } + /* Start at 'c' to jump over already read entries */ + for (i = c; i < ei->nr_entries; i++, ctx->pos++) { + void *cdata = ei->data; - for (i = 0; i < ei->nr_entries; i++) { - void *cdata = data; entry = &ei->entries[i]; name = entry->name; + mutex_lock(&eventfs_mutex); - /* If ei->is_freed, then the event itself may be too */ - if (!ei->is_freed) - r = entry->callback(name, &mode, &cdata, &fops); - else - r = -1; + /* If ei->is_freed then just bail here, nothing more to do */ + if (ei->is_freed) { + mutex_unlock(&eventfs_mutex); + goto out; + } + r = entry->callback(name, &mode, &cdata, &fops); mutex_unlock(&eventfs_mutex); if (r <= 0) continue; - d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); - if (d) { - ret = add_dentries(&dentries, d, cnt); - if (ret < 0) - break; - cnt++; + + ino = EVENTFS_FILE_INODE_INO; + + if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) + goto out; + } + + /* Subtract the skipped entries above */ + c -= min((unsigned int)c, (unsigned int)ei->nr_entries); + + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { + + if (c > 0) { + c--; + continue; } + + ctx->pos++; + + if (ei_child->is_freed) + continue; + + name = ei_child->name; + + ino = eventfs_dir_ino(ei_child); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) + goto out_dec; } - inode_unlock(parent->d_inode); + ret = 1; + out: srcu_read_unlock(&eventfs_srcu, idx); - ret = dcache_dir_open(inode, file); - /* - * dcache_dir_open() sets file->private_data to a dentry cursor. - * Need to save that but also save all the dentries that were - * opened by this function. - */ - dlist->cursor = file->private_data; - dlist->dentries = dentries; - file->private_data = dlist; return ret; -} - -/* - * This just sets the file->private_data back to the cursor and back. - */ -static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) -{ - struct dentry_list *dlist = file->private_data; - int ret; - file->private_data = dlist->cursor; - ret = dcache_readdir(file, ctx); - dlist->cursor = file->private_data; - file->private_data = dlist; - return ret; + out_dec: + /* Incremented ctx->pos without adding something, reset it */ + ctx->pos--; + goto out; } /** @@ -883,7 +882,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode } if (size) { - ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); + ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); if (!ei->d_children) { kfree_const(ei->name); kfree(ei); @@ -950,7 +949,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry goto fail; if (size) { - ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); + ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); if (!ei->d_children) goto fail; } @@ -968,6 +967,14 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry uid = d_inode(dentry->d_parent)->i_uid; gid = d_inode(dentry->d_parent)->i_gid; + /* + * If the events directory is of the top instance, then parent + * is NULL. Set the attr.mode to reflect this and its permissions will + * default to the tracefs root dentry. + */ + if (!parent) + ei->attr.mode = EVENTFS_TOPLEVEL; + /* This is used as the default ownership of the files and directories */ ei->attr.uid = uid; ei->attr.gid = gid; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index ad20e6af938d9b68df7b27e08d44a5351f0d977e..e1b172c0e091a8d55fcc80951fa4ed5202b1539e 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -91,6 +91,7 @@ static int tracefs_syscall_mkdir(struct mnt_idmap *idmap, struct inode *inode, struct dentry *dentry, umode_t mode) { + struct tracefs_inode *ti; char *name; int ret; @@ -98,6 +99,15 @@ static int tracefs_syscall_mkdir(struct mnt_idmap *idmap, if (!name) return -ENOMEM; + /* + * This is a new directory that does not take the default of + * the rootfs. It becomes the default permissions for all the + * files and directories underneath it. + */ + ti = get_tracefs(inode); + ti->flags |= TRACEFS_INSTANCE_INODE; + ti->private = inode; + /* * The mkdir call can call the generic functions that create * the files within the tracefs system. It is up to the individual @@ -141,10 +151,76 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) return ret; } -static const struct inode_operations tracefs_dir_inode_operations = { +static void set_tracefs_inode_owner(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + struct inode *root_inode = ti->private; + + /* + * If this inode has never been referenced, then update + * the permissions to the superblock. + */ + if (!(ti->flags & TRACEFS_UID_PERM_SET)) + inode->i_uid = root_inode->i_uid; + + if (!(ti->flags & TRACEFS_GID_PERM_SET)) + inode->i_gid = root_inode->i_gid; +} + +static int tracefs_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + set_tracefs_inode_owner(inode); + return generic_permission(idmap, inode, mask); +} + +static int tracefs_getattr(struct mnt_idmap *idmap, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct inode *inode = d_backing_inode(path->dentry); + + set_tracefs_inode_owner(inode); + generic_fillattr(idmap, request_mask, inode, stat); + return 0; +} + +static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +{ + unsigned int ia_valid = attr->ia_valid; + struct inode *inode = d_inode(dentry); + struct tracefs_inode *ti = get_tracefs(inode); + + if (ia_valid & ATTR_UID) + ti->flags |= TRACEFS_UID_PERM_SET; + + if (ia_valid & ATTR_GID) + ti->flags |= TRACEFS_GID_PERM_SET; + + return simple_setattr(idmap, dentry, attr); +} + +static const struct inode_operations tracefs_instance_dir_inode_operations = { .lookup = simple_lookup, .mkdir = tracefs_syscall_mkdir, .rmdir = tracefs_syscall_rmdir, + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, +}; + +static const struct inode_operations tracefs_dir_inode_operations = { + .lookup = simple_lookup, + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, +}; + +static const struct inode_operations tracefs_file_inode_operations = { + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, }; struct inode *tracefs_get_inode(struct super_block *sb) @@ -183,82 +259,6 @@ struct tracefs_fs_info { struct tracefs_mount_opts mount_opts; }; -static void change_gid(struct dentry *dentry, kgid_t gid) -{ - if (!dentry->d_inode) - return; - dentry->d_inode->i_gid = gid; -} - -/* - * Taken from d_walk, but without he need for handling renames. - * Nothing can be renamed while walking the list, as tracefs - * does not support renames. This is only called when mounting - * or remounting the file system, to set all the files to - * the given gid. - */ -static void set_gid(struct dentry *parent, kgid_t gid) -{ - struct dentry *this_parent, *dentry; - - this_parent = parent; - spin_lock(&this_parent->d_lock); - - change_gid(this_parent, gid); -repeat: - dentry = d_first_child(this_parent); -resume: - hlist_for_each_entry_from(dentry, d_sib) { - struct tracefs_inode *ti; - - /* Note, getdents() can add a cursor dentry with no inode */ - if (!dentry->d_inode) - continue; - - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - - change_gid(dentry, gid); - - /* If this is the events directory, update that too */ - ti = get_tracefs(dentry->d_inode); - if (ti && (ti->flags & TRACEFS_EVENT_INODE)) - eventfs_update_gid(dentry, gid); - - if (!hlist_empty(&dentry->d_children)) { - spin_unlock(&this_parent->d_lock); - spin_release(&dentry->d_lock.dep_map, _RET_IP_); - this_parent = dentry; - spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); - goto repeat; - } - spin_unlock(&dentry->d_lock); - } - /* - * All done at this level ... ascend and resume the search. - */ - rcu_read_lock(); -ascend: - if (this_parent != parent) { - dentry = this_parent; - this_parent = dentry->d_parent; - - spin_unlock(&dentry->d_lock); - spin_lock(&this_parent->d_lock); - - /* go into the first sibling still alive */ - hlist_for_each_entry_continue(dentry, d_sib) { - if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) { - rcu_read_unlock(); - goto resume; - } - } - goto ascend; - } - rcu_read_unlock(); - spin_unlock(&this_parent->d_lock); - return; -} - static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) { substring_t args[MAX_OPT_ARGS]; @@ -331,10 +331,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) if (!remount || opts->opts & BIT(Opt_uid)) inode->i_uid = opts->uid; - if (!remount || opts->opts & BIT(Opt_gid)) { - /* Set all the group ids to the mount option */ - set_gid(sb->s_root, opts->gid); - } + if (!remount || opts->opts & BIT(Opt_gid)) + inode->i_gid = opts->gid; return 0; } @@ -568,6 +566,26 @@ struct dentry *eventfs_end_creating(struct dentry *dentry) return dentry; } +/* Find the inode that this will use for default */ +static struct inode *instance_inode(struct dentry *parent, struct inode *inode) +{ + struct tracefs_inode *ti; + + /* If parent is NULL then use root inode */ + if (!parent) + return d_inode(inode->i_sb->s_root); + + /* Find the inode that is flagged as an instance or the root inode */ + while (!IS_ROOT(parent)) { + ti = get_tracefs(d_inode(parent)); + if (ti->flags & TRACEFS_INSTANCE_INODE) + break; + parent = parent->d_parent; + } + + return d_inode(parent); +} + /** * tracefs_create_file - create a file in the tracefs filesystem * @name: a pointer to a string containing the name of the file to create. @@ -598,6 +616,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { + struct tracefs_inode *ti; struct dentry *dentry; struct inode *inode; @@ -616,7 +635,11 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, if (unlikely(!inode)) return tracefs_failed_creating(dentry); + ti = get_tracefs(inode); + ti->private = instance_inode(parent, inode); + inode->i_mode = mode; + inode->i_op = &tracefs_file_inode_operations; inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; inode->i_uid = d_inode(dentry->d_parent)->i_uid; @@ -629,6 +652,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, static struct dentry *__create_dir(const char *name, struct dentry *parent, const struct inode_operations *ops) { + struct tracefs_inode *ti; struct dentry *dentry = tracefs_start_creating(name, parent); struct inode *inode; @@ -646,6 +670,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; + ti = get_tracefs(inode); + ti->private = instance_inode(parent, inode); + /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); @@ -676,7 +703,7 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; - return __create_dir(name, parent, &simple_dir_inode_operations); + return __create_dir(name, parent, &tracefs_dir_inode_operations); } /** @@ -707,7 +734,7 @@ __init struct dentry *tracefs_create_instance_dir(const char *name, if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) return NULL; - dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); + dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations); if (!dentry) return NULL; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 42bdeb471a0720c5d96b1830ecf2ec75206e2ae4..45397df9bb65bffb783329c156e03a2fdb01ebea 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -5,6 +5,9 @@ enum { TRACEFS_EVENT_INODE = BIT(1), TRACEFS_EVENT_TOP_INODE = BIT(2), + TRACEFS_GID_PERM_SET = BIT(3), + TRACEFS_UID_PERM_SET = BIT(4), + TRACEFS_INSTANCE_INODE = BIT(5), }; struct tracefs_inode { @@ -52,6 +55,10 @@ struct eventfs_inode { struct eventfs_attr *entry_attrs; struct eventfs_attr attr; void *data; + unsigned int is_freed:1; + unsigned int is_events:1; + unsigned int nr_entries:30; + unsigned int ino; /* * Union - used for deletion * @llist: for calling dput() if needed after RCU @@ -61,9 +68,6 @@ struct eventfs_inode { struct llist_node llist; struct rcu_head rcu; }; - unsigned int is_freed:1; - unsigned int is_events:1; - unsigned int nr_entries:30; }; static inline struct tracefs_inode *get_tracefs(const struct inode *inode) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 98aaca933bddb76f62b7927576f5d560ebd61c84..f362345467facd57cc314547142e1093b4e54983 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3277,7 +3277,7 @@ xfs_bmap_alloc_account( struct xfs_bmalloca *ap) { bool isrt = XFS_IS_REALTIME_INODE(ap->ip) && - (ap->flags & XFS_BMAPI_ATTRFORK); + !(ap->flags & XFS_BMAPI_ATTRFORK); uint fld; if (ap->flags & XFS_BMAPI_COWFORK) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2b3ae51f950df0639f35a17450a3d9b2a7ce9be6..e4d24d3f9abb5e20fcf24f3b6bf88c8d6e632108 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -719,6 +719,8 @@ struct acpi_pci_root { /* helper */ +struct iommu_ops; + bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index 84ec53ccc450296f264cad5952b4cc26e73dad40..7ee8a179d1036e1d8010b8b18a8f3022e41c1695 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -91,6 +91,12 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) } #endif +#ifndef flush_cache_vmap_early +static inline void flush_cache_vmap_early(unsigned long start, unsigned long end) +{ +} +#endif + #ifndef flush_cache_vunmap static inline void flush_cache_vunmap(unsigned long start, unsigned long end) { diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h index 43e18db89c1439fc3aed1ed6a29a003fa66b899d..ad928cce268b40bcf09566c708e16a432f7c7c6d 100644 --- a/include/asm-generic/checksum.h +++ b/include/asm-generic/checksum.h @@ -2,6 +2,8 @@ #ifndef __ASM_GENERIC_CHECKSUM_H #define __ASM_GENERIC_CHECKSUM_H +#include + /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -31,9 +33,7 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); static inline __sum16 csum_fold(__wsum csum) { u32 sum = (__force u32)csum; - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__force __sum16)((~sum - ror32(sum, 16)) >> 16); } #endif diff --git a/include/dt-bindings/dma/fsl-edma.h b/include/dt-bindings/dma/fsl-edma.h new file mode 100644 index 0000000000000000000000000000000000000000..fd11478cfe9cc27370f0645fbf21164108271129 --- /dev/null +++ b/include/dt-bindings/dma/fsl-edma.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ + +#ifndef _FSL_EDMA_DT_BINDING_H_ +#define _FSL_EDMA_DT_BINDING_H_ + +/* Receive Channel */ +#define FSL_EDMA_RX 0x1 + +/* iMX8 audio remote DMA */ +#define FSL_EDMA_REMOTE 0x2 + +/* FIFO is continue memory region */ +#define FSL_EDMA_MULTI_FIFO 0x4 + +/* Channel need stick to even channel */ +#define FSL_EDMA_EVEN_CH 0x8 + +/* Channel need stick to odd channel */ +#define FSL_EDMA_ODD_CH 0x10 + +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 118a18b7ff844a357cba99eeb5272bbdc2165f7d..b7165e52b3c687bde0d295e7094cff66f5aebd48 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,6 +15,7 @@ #include #include #include +#include struct irq_domain; struct irq_domain_ops; @@ -431,6 +432,16 @@ int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif +#ifdef CONFIG_ACPI_HMAT +int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord); +#else +static inline int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + return -EOPNOTSUPP; +} +#endif + #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); diff --git a/include/linux/bio.h b/include/linux/bio.h index ec4db73e5f4ec42409c38d228dcf3a9d9c42c184..875d792bffff827aa2f489a7aa1b631810750b10 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -286,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, { struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + if (unlikely(i >= bio->bi_vcnt)) { + fi->folio = NULL; + return; + } + fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); @@ -303,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); fi->_next = folio_next(fi->folio); - } else if (fi->_i + 1 < bio->bi_vcnt) { - bio_first_folio(fi, bio, fi->_i + 1); } else { - fi->folio = NULL; + bio_first_folio(fi, bio, fi->_i + 1); } } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a676e116085f331ed8bb03208ce7f744a8376f21..7a8150a5f051339f680b9df83fa78da48b8c8af1 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -391,9 +391,6 @@ struct blk_mq_hw_ctx { */ struct blk_mq_tags *sched_tags; - /** @run: Number of dispatched requests. */ - unsigned long run; - /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 59d404e22814e885ae8429a361f2f43bed3b8f30..cf5c6ff489812e1cabe102f89a69c561319f6e99 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -512,7 +512,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_member * +const struct btf_type * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index b8610e9d2471f5a7928e8d1b62a418e491ea575d..fa018d5864e7422c522194c16ff45a8dd0db1376 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -572,9 +572,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); */ #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 -static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) { - return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); + if (!cnt) + cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL; + + return __ceph_alloc_sparse_ext_map(op, cnt); } extern void ceph_osdc_get_request(struct ceph_osd_request *req); diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h new file mode 100644 index 0000000000000000000000000000000000000000..91125eca4c8ab8ded08a5b4b687c65c69d656401 --- /dev/null +++ b/include/linux/cxl-event.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_generic { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + +enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +typedef void (*cxl_cper_callback)(enum cxl_event_type type, + struct cxl_cper_event_rec *rec); + +#ifdef CONFIG_ACPI_APEI_GHES +int cxl_cper_register_callback(cxl_cper_callback callback); +int cxl_cper_unregister_callback(cxl_cper_callback callback); +#else +static inline int cxl_cper_register_callback(cxl_cper_callback callback) +{ + return 0; +} + +static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) +{ + return 0; +} +#endif + +#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/device.h b/include/linux/device.h index 58af8307c7ddd8d345f070d9a65011797f72324c..97c4b046c09d9464243c81f294724985dc4a292a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,7 +42,6 @@ struct class; struct subsys_private; struct device_node; struct fwnode_handle; -struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 25127f7503494588e1f4f9d42bf84c344342c7c8..5ef4ec1c36c3b9d7d8e514cf0f46ac2f14bc66f6 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -62,9 +62,6 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. - * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU - * driver implementations to a bus and allow the driver to do - * bus-specific setup * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,8 +101,6 @@ struct bus_type { const struct dev_pm_ops *pm; - const struct iommu_ops *iommu_ops; - bool need_parent_lock; }; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index e401f824a007fe3b7b3ce8375ab5b9d0404f3bbc..4abc60f04209281bf8af6905c4ec3d3bb6b531b5 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -11,6 +11,7 @@ #include struct cma; +struct iommu_ops; /* * Values for struct dma_map_ops.flags: @@ -426,10 +427,10 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); + bool coherent); #else static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) + u64 size, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ diff --git a/include/linux/export.h b/include/linux/export.h index 9911508a9604fb048c2587730520331ac621fd80..0bbd02fd351db9239cfd39709e945c649582fb52 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -6,15 +6,6 @@ #include #include -/* - * Export symbols from the kernel to modules. Forked from module.h - * to reduce the amount of pointless cruft we feed to gcc when only - * exporting a simple symbol or two. - * - * Try not to add #includes here. It slows compilation and makes kernel - * hackers place grumpy comments in header files. - */ - /* * This comment block is used by fixdep. Please do not remove. * @@ -23,15 +14,6 @@ * side effect of the *.o build rule. */ -#ifndef __ASSEMBLY__ -#ifdef MODULE -extern struct module __this_module; -#define THIS_MODULE (&__this_module) -#else -#define THIS_MODULE ((struct module *)0) -#endif -#endif /* __ASSEMBLY__ */ - #ifdef CONFIG_64BIT #define __EXPORT_SYMBOL_REF(sym) \ .balign 8 ASM_NL \ diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 79ef6ac4c02113e92454d94e80565b06073c4722..89a6888f2f9e502d38f09e6a1c66f0697c3d7d08 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -214,51 +214,6 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return ret; } -/* Defined after fortified strlen() to reuse it. */ -extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); -/** - * strlcpy - Copy a string into another string buffer - * - * @p: pointer to destination of copy - * @q: pointer to NUL-terminated source string to copy - * @size: maximum number of bytes to write at @p - * - * If strlen(@q) >= @size, the copy of @q will be truncated at - * @size - 1 bytes. @p will always be NUL-terminated. - * - * Do not use this function. While FORTIFY_SOURCE tries to avoid - * over-reads when calculating strlen(@q), it is still possible. - * Prefer strscpy(), though note its different return values for - * detecting truncation. - * - * Returns total number of bytes written to @p, including terminating NUL. - * - */ -__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) -{ - const size_t p_size = __member_size(p); - const size_t q_size = __member_size(q); - size_t q_len; /* Full count of source string length. */ - size_t len; /* Count of characters going into destination. */ - - if (p_size == SIZE_MAX && q_size == SIZE_MAX) - return __real_strlcpy(p, q, size); - q_len = strlen(q); - len = (q_len >= size) ? size - 1 : q_len; - if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) { - /* Write size is always larger than destination. */ - if (len >= p_size) - __write_overflow(); - } - if (size) { - if (len >= p_size) - fortify_panic(__func__); - __underlying_memcpy(p, q, len); - p[len] = '\0'; - } - return q_len; -} - /* Defined after fortified strnlen() to reuse it. */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); /** @@ -272,12 +227,6 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); * @p buffer. The behavior is undefined if the string buffers overlap. The * destination @p buffer is always NUL terminated, unless it's zero-sized. * - * Preferred to strlcpy() since the API doesn't require reading memory - * from the source @q string beyond the specified @size bytes, and since - * the return value is easier to error-check than strlcpy()'s. - * In addition, the implementation is robust to the string changing out - * from underneath it, unlike the current strlcpy() implementation. - * * Preferred to strncpy() since it always returns a valid string, and * doesn't unnecessarily force the tail of the destination buffer to be * zero padded. If padding is desired please use strscpy_pad(). diff --git a/include/linux/fs.h b/include/linux/fs.h index e6ba0cc6f2eeeaea1291dbf4e88c8f6af462e96a..ed5966a70495129be1d6729eed2918240db62df1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2371,7 +2371,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) -#define I_PINNING_FSCACHE_WB (1 << 18) +#define I_PINNING_NETFS_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index a174cedf4d9072ae708202693f9647d85dcbe515..bdf7f3eddf0a2fb26b9276f6dacb92228c8e6d29 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -189,17 +189,20 @@ extern atomic_t fscache_n_write; extern atomic_t fscache_n_no_write_space; extern atomic_t fscache_n_no_create_space; extern atomic_t fscache_n_culled; +extern atomic_t fscache_n_dio_misfit; #define fscache_count_read() atomic_inc(&fscache_n_read) #define fscache_count_write() atomic_inc(&fscache_n_write) #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) #define fscache_count_culled() atomic_inc(&fscache_n_culled) +#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit) #else #define fscache_count_read() do {} while(0) #define fscache_count_write() do {} while(0) #define fscache_count_no_write_space() do {} while(0) #define fscache_count_no_create_space() do {} while(0) #define fscache_count_culled() do {} while(0) +#define fscache_count_dio_misfit() do {} while(0) #endif #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 8e312c8323a8e5048d0780401659d2dbe7d90948..6e8562cbcc43221e50cfd2b5698a99b2f7c2cb3f 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r * indicates the cache resources to which the operation state should be * attached; @cookie indicates the cache object that will be accessed. * - * This is intended to be called from the ->begin_cache_operation() netfs lib - * operation as implemented by the network filesystem. - * * @cres->inval_counter is set from @cookie->inval_counter for comparison at * the end of the operation. This allows invalidation during the operation to * be detected by the caller. @@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } -#if __fscache_available -bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, - struct fscache_cookie *cookie); -#else -#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ - filemap_dirty_folio(MAPPING, FOLIO) -#endif - -/** - * fscache_unpin_writeback - Unpin writeback resources - * @wbc: The writeback control - * @cookie: The cookie referring to the cache object - * - * Unpin the writeback resources pinned by fscache_dirty_folio(). This is - * intended to be called by the netfs's ->write_inode() method. - */ -static inline void fscache_unpin_writeback(struct writeback_control *wbc, - struct fscache_cookie *cookie) -{ - if (wbc->unpinned_fscache_wb) - fscache_unuse_cookie(cookie, NULL, NULL); -} - -/** - * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode - * @cookie: The cookie referring to the cache object - * @inode: The inode to clean up - * @aux: Auxiliary data to apply to the inode - * - * Clear any writeback resources held by an inode when the inode is evicted. - * This must be called before clear_inode() is called. - */ -static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, - struct inode *inode, - const void *aux) -{ - if (inode->i_state & I_PINNING_FSCACHE_WB) { - loff_t i_size = i_size_read(inode); - fscache_unuse_cookie(cookie, aux, &i_size); - } -} - /** * fscache_note_page_release - Note that a netfs page got released * @cookie: The cookie corresponding to the file diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h index ca49947f0a775a16df3f086bb490c46b9f6efb17..95421860397a236d101bf4c3bc7a934a515bf847 100644 --- a/include/linux/fw_table.h +++ b/include/linux/fw_table.h @@ -25,16 +25,35 @@ struct acpi_subtable_proc { int count; }; +union fw_table_header { + struct acpi_table_header acpi; + struct acpi_table_cdat cdat; +}; + union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; struct acpi_prmt_module_header prmt; struct acpi_cedt_header cedt; + struct acpi_cdat_header cdat; }; int acpi_parse_entries_array(char *id, unsigned long table_size, - struct acpi_table_header *table_header, + union fw_table_header *table_header, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); +int cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, void *arg, + struct acpi_table_cdat *table_header); + +/* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */ +#if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS) +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_fwtbl_lib __init_or_acpilib +#else +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL) +#define __init_or_fwtbl_lib +#endif + #endif diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index e846bd4e7559bb54bba7ffb2dcda3dc8e0099e16..9a5c6c76e6533385dbb32de98abfd330c8736585 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -635,7 +635,7 @@ struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); DEFINE_FREE(gpio_device_put, struct gpio_device *, - if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); + if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T)) struct device *gpio_device_to_device(struct gpio_device *gdev); diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 3f84aeb81e480b238842cc5c0e18478da854778a..80fa930b04c6795eb7c6143a79655a6f918446eb 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -21,6 +21,7 @@ struct device; * disable button via sysfs * @value: axis value for %EV_ABS * @irq: Irq number in case of interrupt keys + * @wakeirq: Optional dedicated wake-up interrupt */ struct gpio_keys_button { unsigned int code; @@ -34,6 +35,7 @@ struct gpio_keys_button { bool can_disable; int value; unsigned int irq; + unsigned int wakeirq; }; /** diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0dae9db275380b16bada4327f8f02e53ff8db30f..652ecb7abedae4b5bc3c451410139168702a63f9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -23,7 +23,7 @@ #include /* for swab16 */ #include -extern struct bus_type i2c_bus_type; +extern const struct bus_type i2c_bus_type; extern struct device_type i2c_adapter_type; extern struct device_type i2c_client_type; @@ -746,6 +746,8 @@ struct i2c_adapter { struct irq_domain *host_notify_domain; struct regulator *bus_regulator; + + struct dentry *debugfs; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) @@ -850,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ -#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Warn users that the adapter doesn't support classes anymore */ #define I2C_CLASS_DEPRECATED (1<<8) diff --git a/include/linux/init.h b/include/linux/init.h index 01b52c9c75268f1cdebcfaba9750304d20618002..3fa3f6241350b2a81226a58fc77e2e4d0135e78d 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -179,6 +179,13 @@ extern void (*late_time_init)(void); extern bool initcall_debug; +#ifdef MODULE +extern struct module __this_module; +#define THIS_MODULE (&__this_module) +#else +#define THIS_MODULE ((struct module *)0) +#endif + #endif #ifndef MODULE diff --git a/include/linux/input/as5011.h b/include/linux/input/as5011.h index 5fba52a56cd61e88a319b10a0c6242fd155c3576..5705d5de3aeaeeca919eae7cdc89d617a0cb5075 100644 --- a/include/linux/input/as5011.h +++ b/include/linux/input/as5011.h @@ -7,7 +7,6 @@ */ struct as5011_platform_data { - unsigned int button_gpio; unsigned int axis_irq; /* irq number */ unsigned long axis_irqflags; char xp, xn; /* threshold for x axis */ diff --git a/include/linux/input/navpoint.h b/include/linux/input/navpoint.h index d464ffb4db52b9654074af09264754043a30beab..5192ae3f5ec1b67738fb31d34a34cd97e19f3ab3 100644 --- a/include/linux/input/navpoint.h +++ b/include/linux/input/navpoint.h @@ -5,5 +5,4 @@ struct navpoint_platform_data { int port; /* PXA SSP port for pxa_ssp_request() */ - int gpio; /* GPIO for power on/off */ }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 25142a0e2fc2c51d4c7807a1fb87cc21b16a163b..86cf1f7ae389a40180b86dd6850102f6fe04c188 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -100,6 +100,30 @@ struct io_pgtable_cfg { const struct iommu_flush_ops *tlb; struct device *iommu_dev; + /** + * @alloc: Custom page allocator. + * + * Optional hook used to allocate page tables. If this function is NULL, + * @free must be NULL too. + * + * Memory returned should be zeroed and suitable for dma_map_single() and + * virt_to_phys(). + * + * Not all formats support custom page allocators. Before considering + * passing a non-NULL value, make sure the chosen page format supports + * this feature. + */ + void *(*alloc)(void *cookie, size_t size, gfp_t gfp); + + /** + * @free: Custom page de-allocator. + * + * Optional hook used to free page tables allocated with the @alloc + * hook. Must be non-NULL if @alloc is not NULL, must be NULL + * otherwise. + */ + void (*free)(void *cookie, void *pages, size_t size); + /* Low-level data specific to the table format */ union { struct { @@ -241,16 +265,26 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } +/** + * enum io_pgtable_caps - IO page table backend capabilities. + */ +enum io_pgtable_caps { + /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */ + IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0), +}; + /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. * * @alloc: Allocate a set of page tables described by cfg. * @free: Free the page tables associated with iop. + * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities. */ struct io_pgtable_init_fns { struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); void (*free)(struct io_pgtable *iop); + u32 caps; }; extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6291aa7b079b0df321f06ede5b69180b20941521..1ea2a820e1eb035c9eea2ec97d9874c52bbd0b42 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -106,7 +106,7 @@ struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; - + const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; @@ -121,6 +121,11 @@ struct iommu_domain { struct { /* IOMMU_DOMAIN_SVA */ struct mm_struct *mm; int users; + /* + * Next iommu_domain in mm->iommu_mm->sva-domains list + * protected by iommu_sva_lock. + */ + struct list_head next; }; }; }; @@ -284,6 +289,23 @@ struct iommu_user_data { size_t len; }; +/** + * struct iommu_user_data_array - iommu driver specific user space data array + * @type: The data type of all the entries in the user buffer array + * @uptr: Pointer to the user buffer array + * @entry_len: The fixed-width length of an entry in the array, in bytes + * @entry_num: The number of total entries in the array + * + * The user buffer includes an array of requests with format defined in + * include/uapi/linux/iommufd.h + */ +struct iommu_user_data_array { + unsigned int type; + void __user *uptr; + size_t entry_len; + u32 entry_num; +}; + /** * __iommu_copy_struct_from_user - Copy iommu driver specific user space data * @dst_data: Pointer to an iommu driver specific user data that is defined in @@ -324,6 +346,57 @@ static inline int __iommu_copy_struct_from_user( sizeof(*kdst), \ offsetofend(typeof(*kdst), min_last)) +/** + * __iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_array: Pointer to a struct iommu_user_data_array for a user space array + * @data_type: The data type of the @dst_data. Must match with @src_array.type + * @index: Index to the location in the array to copy user data from + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user_array( + void *dst_data, const struct iommu_user_data_array *src_array, + unsigned int data_type, unsigned int index, size_t data_len, + size_t min_len) +{ + struct iommu_user_data src_data; + + if (WARN_ON(!src_array || index >= src_array->entry_num)) + return -EINVAL; + if (!src_array->entry_num) + return -EINVAL; + src_data.uptr = src_array->uptr + src_array->entry_len * index; + src_data.len = src_array->entry_len; + src_data.type = src_array->type; + + return __iommu_copy_struct_from_user(dst_data, &src_data, data_type, + data_len, min_len); +} + +/** + * iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * @index: Index to the location in the array to copy user data from + * @min_last: The last member of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ + min_last) \ + __iommu_copy_struct_from_user_array( \ + kdst, user_array, data_type, index, sizeof(*(kdst)), \ + offsetofend(typeof(*(kdst)), min_last)) + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability @@ -440,6 +513,13 @@ struct iommu_ops { * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue + * @cache_invalidate_user: Flush hardware cache for user space IO page table. + * The @domain must be IOMMU_DOMAIN_NESTED. The @array + * passes in the cache invalidation requests, in form + * of a driver data structure. The driver must update + * array->entry_num to report the number of handled + * invalidation requests. The driver data structure + * must be defined in include/uapi/linux/iommufd.h * @iova_to_phys: translate iova to physical address * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform @@ -465,6 +545,8 @@ struct iommu_domain_ops { size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); + int (*cache_invalidate_user)(struct iommu_domain *domain, + struct iommu_user_data_array *array); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -812,6 +894,11 @@ struct iommu_sva { struct iommu_domain *domain; }; +struct iommu_mm_data { + u32 pasid; + struct list_head sva_domains; +}; + int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); @@ -840,10 +927,7 @@ static inline void *dev_iommu_priv_get(struct device *dev) return NULL; } -static inline void dev_iommu_priv_set(struct device *dev, void *priv) -{ - dev->iommu->priv = priv; -} +void dev_iommu_priv_set(struct device *dev, void *priv); extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); @@ -1337,15 +1421,33 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA static inline void mm_pasid_init(struct mm_struct *mm) { - mm->pasid = IOMMU_PASID_INVALID; + /* + * During dup_mm(), a new mm will be memcpy'd from an old one and that makes + * the new mm and the old one point to a same iommu_mm instance. When either + * one of the two mms gets released, the iommu_mm instance is freed, leaving + * the other mm running into a use-after-free/double-free problem. To avoid + * the problem, zeroing the iommu_mm pointer of a new mm is needed here. + */ + mm->iommu_mm = NULL; } + static inline bool mm_valid_pasid(struct mm_struct *mm) { - return mm->pasid != IOMMU_PASID_INVALID; + return READ_ONCE(mm->iommu_mm); +} + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + struct iommu_mm_data *iommu_mm = READ_ONCE(mm->iommu_mm); + + if (!iommu_mm) + return IOMMU_PASID_INVALID; + return iommu_mm->pasid; } + void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); @@ -1368,6 +1470,12 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) } static inline void mm_pasid_init(struct mm_struct *mm) {} static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + return IOMMU_PASID_INVALID; +} + static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif /* CONFIG_IOMMU_SVA */ diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 7578d4f6a969a419a8e39e38a8886dc9119c6973..db1249cd9692080f495c8986826d96eaf56b7995 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task) } #ifdef CONFIG_BLOCK -int __get_task_ioprio(struct task_struct *p); +/* + * If the task has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + * + * Expected to be called for current task or with task_lock() held to keep + * io_context stable. + */ +static inline int __get_task_ioprio(struct task_struct *p) +{ + struct io_context *ioc = p->io_context; + int prio; + + if (!ioc) + return IOPRIO_DEFAULT; + + if (p != current) + lockdep_assert_held(&p->alloc_lock); + + prio = ioc->ioprio; + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); + return prio; +} #else static inline int __get_task_ioprio(struct task_struct *p) { diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087bb8cb32de8f62a4a52abbaa4d5b80..34dfcc77f505aa39ad6b48f394578d542394dde5 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 1e39d27bee418c1a7c0a6a966bbc249056f008fd..69e78190008271a94ebdabb9d944612c58acb7e1 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -33,7 +33,7 @@ struct memory_dev_type { struct kref kref; }; -struct node_hmem_attrs; +struct access_coordinate; #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; @@ -45,9 +45,9 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype); int register_mt_adistance_algorithm(struct notifier_block *nb); int unregister_mt_adistance_algorithm(struct notifier_block *nb); int mt_calc_adistance(int node, int *adist); -int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source); -int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist); +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -126,13 +126,13 @@ static inline int mt_calc_adistance(int node, int *adist) return NOTIFY_DONE; } -static inline int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +static inline int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { return -EIO; } -static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { return -EIO; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7235f3eaed8bee8ad09a1dac4225960f354862d9..bf5320b28b8bf045f7ab3492eb7f050e027df29d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1241,7 +1241,8 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 reserved_at_c0[0x13]; u8 desc_group_mkey_supported[0x1]; - u8 reserved_at_d4[0xc]; + u8 freeze_to_rdy_supported[0x1]; + u8 reserved_at_d5[0xb]; u8 reserved_at_e0[0x20]; diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index b86d51a855f6709762e2da6862e387eaf56a99e1..40371c916cf94d13030c6b6bf83c6118d46f0caa 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -145,6 +145,10 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b2d3a88a34d127733f1e54043d5c08814a8af28d..8b611e13153e68d944126f7cf57bca8a2bc69290 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -730,6 +730,7 @@ struct mm_cid { #endif struct kioctx_table; +struct iommu_mm_data; struct mm_struct { struct { /* @@ -941,8 +942,8 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SVA - u32 pasid; +#ifdef CONFIG_IOMMU_MM_DATA + struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM /* diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f980edfdd2783ed954c1043324928bccd6fbfa65..743475ca7e9d5132f48ee78c46b485e1018a08b3 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -42,7 +42,7 @@ static inline int nf_bridge_get_physinif(const struct sk_buff *skb) if (!nf_bridge) return 0; - return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; + return nf_bridge->physinif; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) @@ -56,11 +56,11 @@ static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) } static inline struct net_device * -nf_bridge_get_physindev(const struct sk_buff *skb) +nf_bridge_get_physindev(const struct sk_buff *skb, struct net *net) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - return nf_bridge ? nf_bridge->physindev : NULL; + return nf_bridge ? dev_get_by_index_rcu(net, nf_bridge->physinif) : NULL; } static inline struct net_device * diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b11a84f6c32b79ea1efcfa692f3e0326c807f5e2..100cbb261269d1921bff6e616e86223ee9e5512c 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page) return folio_wait_private_2_killable(page_folio(page)); } +/* Marks used on xarray-based buffers */ +#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ +#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ + enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, NETFS_INVALID_READ, + NETFS_UPLOAD_TO_SERVER, + NETFS_WRITE_TO_CACHE, + NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, @@ -129,8 +136,56 @@ struct netfs_inode { struct fscache_cookie *cache; #endif loff_t remote_i_size; /* Size of the remote file */ + loff_t zero_point; /* Size after which we assume there's no data + * on the server */ + unsigned long flags; +#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ +#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ +#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ +#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ +}; + +/* + * A netfs group - for instance a ceph snap. This is marked on dirty pages and + * pages marked with a group must be flushed before they can be written under + * the domain of another group. + */ +struct netfs_group { + refcount_t ref; + void (*free)(struct netfs_group *netfs_group); }; +/* + * Information about a dirty page (attached only if necessary). + * folio->private + */ +struct netfs_folio { + struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ + unsigned int dirty_offset; /* Write-streaming dirty data offset */ + unsigned int dirty_len; /* Write-streaming dirty data length */ +}; +#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ + +static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +{ + void *priv = folio_get_private(folio); + + if ((unsigned long)priv & NETFS_FOLIO_INFO) + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); + return NULL; +} + +static inline struct netfs_group *netfs_folio_group(struct folio *folio) +{ + struct netfs_folio *finfo; + void *priv = folio_get_private(folio); + + finfo = netfs_folio_info(folio); + if (finfo) + return finfo->netfs_group; + return priv; +} + /* * Resources required to do operations on a cache. */ @@ -143,17 +198,24 @@ struct netfs_cache_resources { }; /* - * Descriptor for a single component subrequest. + * Descriptor for a single component subrequest. Each operation represents an + * individual read/write from/to a server, a cache, a journal, etc.. + * + * The buffer iterator is persistent for the life of the subrequest struct and + * the pages it points to can be relied on to exist for the duration. */ struct netfs_io_subrequest { struct netfs_io_request *rreq; /* Supervising I/O request */ + struct work_struct work; struct list_head rreq_link; /* Link in rreq->subrequests */ + struct iov_iter io_iter; /* Iterator for this subrequest */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ + unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ @@ -168,6 +230,13 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ + NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ + NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ + NETFS_DIO_READ, /* This is a direct I/O read */ + NETFS_DIO_WRITE, /* This is a direct I/O write */ + nr__netfs_io_origin } __mode(byte); /* @@ -175,19 +244,34 @@ enum netfs_io_origin { * operations to a variety of data stores and then stitch the result together. */ struct netfs_io_request { - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ + struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ + struct iov_iter iter; /* Unencrypted-side iterator */ + struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ + struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + unsigned int subreq_counter; /* Next subreq->debug_index */ atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ + size_t upper_len; /* Length can be extended to here */ + size_t transferred; /* Amount to be indicated as transferred */ short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ @@ -199,17 +283,25 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ +#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ +#define NETFS_RREQ_BLOCKED 10 /* We blocked */ const struct netfs_request_ops *netfs_ops; + void (*cleanup)(struct netfs_io_request *req); }; /* * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { + unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ + unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ int (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); - int (*begin_cache_operation)(struct netfs_io_request *rreq); + void (*free_subrequest)(struct netfs_io_subrequest *rreq); + /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); @@ -217,6 +309,14 @@ struct netfs_request_ops { int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); + + /* Modification handling */ + void (*update_i_size)(struct inode *inode, loff_t i_size); + + /* Write request handling */ + void (*create_write_requests)(struct netfs_io_request *wreq, + loff_t start, size_t len); + void (*invalidate_cache)(struct netfs_io_request *wreq); }; /* @@ -229,8 +329,7 @@ enum netfs_read_from_hole { }; /* - * Table of operations for access to a cache. This is obtained by - * rreq->ops->begin_cache_operation(). + * Table of operations for access to a cache. */ struct netfs_cache_ops { /* End an operation */ @@ -265,8 +364,8 @@ struct netfs_cache_ops { * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size, - bool no_space_allocated_yet); + loff_t *_start, size_t *_len, size_t upper_len, + loff_t i_size, bool no_space_allocated_yet); /* Prepare an on-demand read operation, shortening it to a cached/uncached * boundary as appropriate. @@ -284,22 +383,62 @@ struct netfs_cache_ops { loff_t *_data_start, size_t *_data_len); }; +/* High-level read API. */ +ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); + +/* High-level write API */ +ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group); +ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, + struct netfs_group *netfs_group); +ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); +ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); + +/* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, - struct address_space *, loff_t pos, unsigned int len, - struct folio **, void **fsdata); - + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); +int netfs_writepages(struct address_space *mapping, + struct writeback_control *wbc); +bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); +int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); +void netfs_clear_inode_writeback(struct inode *inode, const void *aux); +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); +bool netfs_release_folio(struct folio *folio, gfp_t gfp); +int netfs_launder_folio(struct folio *folio); + +/* VMA operations API. */ +vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); + +/* (Sub)request management API. */ void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); -void netfs_stats_show(struct seq_file *); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); +size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs); +struct netfs_io_subrequest *netfs_create_write_request( + struct netfs_io_request *wreq, enum netfs_io_source dest, + loff_t start, size_t len, work_func_t worker); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, + bool was_async); +void netfs_queue_write_request(struct netfs_io_subrequest *subreq); + +int netfs_start_io_read(struct inode *inode); +void netfs_end_io_read(struct inode *inode); +int netfs_start_io_write(struct inode *inode); +void netfs_end_io_write(struct inode *inode); +int netfs_start_io_direct(struct inode *inode); +void netfs_end_io_direct(struct inode *inode); /** * netfs_inode - Get the netfs inode context from the inode @@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list + * @use_zero_point: True to use the zero_point read optimisation * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ static inline void netfs_inode_init(struct netfs_inode *ctx, - const struct netfs_request_ops *ops) + const struct netfs_request_ops *ops, + bool use_zero_point) { ctx->ops = ops; ctx->remote_i_size = i_size_read(&ctx->inode); + ctx->zero_point = LLONG_MAX; + ctx->flags = 0; #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif + /* ->releasepage() drives zero_point */ + if (use_zero_point) { + ctx->zero_point = ctx->remote_i_size; + mapping_set_release_always(ctx->inode.i_mapping); + } } /** * netfs_resize_file - Note that a file got resized * @ctx: The netfs inode being resized * @new_i_size: The new file size + * @changed_on_server: The change was applied to the server * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) +static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, + bool changed_on_server) { - ctx->remote_i_size = new_i_size; + if (changed_on_server) + ctx->remote_i_size = new_i_size; + if (new_i_size < ctx->zero_point) + ctx->zero_point = new_i_size; } /** diff --git a/include/linux/node.h b/include/linux/node.h index 427a5975cf405045ded741ed4c1b4b9eaca1ffec..25b66d705ee2ec754021d5ea2e2f1bce15ef7dbe 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -20,14 +20,14 @@ #include /** - * struct node_hmem_attrs - heterogeneous memory performance attributes + * struct access_coordinate - generic performance coordinates container * * @read_bandwidth: Read bandwidth in MB/s * @write_bandwidth: Write bandwidth in MB/s * @read_latency: Read latency in nanoseconds * @write_latency: Write latency in nanoseconds */ -struct node_hmem_attrs { +struct access_coordinate { unsigned int read_bandwidth; unsigned int write_bandwidth; unsigned int read_latency; @@ -65,7 +65,7 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, unsigned access); #else static inline void node_add_cache(unsigned int nid, @@ -74,7 +74,7 @@ static inline void node_add_cache(unsigned int nid, } static inline void node_set_perf_attrs(unsigned int nid, - struct node_hmem_attrs *hmem_attrs, + struct access_coordinate *coord, unsigned access) { } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 44325c068b6a01eb81274fa65767dd9298d35643..462c21e0e417654e56edf314ebcb62d8c6f4ad16 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -20,7 +20,6 @@ #define NVMF_TRSVCID_SIZE 32 #define NVMF_TRADDR_SIZE 256 #define NVMF_TSAS_SIZE 256 -#define NVMF_AUTH_HASH_LEN 64 #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" diff --git a/include/linux/of_device.h b/include/linux/of_device.h index a72661e47faa56a6c316dcd387153275e0990626..9042bca5bb848c5fd4239565aaac679fc951e754 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -2,10 +2,7 @@ #ifndef _LINUX_OF_DEVICE_H #define _LINUX_OF_DEVICE_H -#include -#include /* temporary until merge */ - -#include +#include struct device; struct of_device_id; diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 9a5e6b410dd2fb154c40da4139514c0945f6e370..e61cbbe12dac6f40d739be9b77c2bc755fead974 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -8,20 +8,19 @@ struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id); +extern int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id); extern void of_iommu_get_resv_regions(struct device *dev, struct list_head *list); #else -static inline const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +static inline int of_iommu_configure(struct device *dev, + struct device_node *master_np, + const u32 *id) { - return NULL; + return -ENODEV; } static inline void of_iommu_get_resv_regions(struct device *dev, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index fadfea5754852df256da1f77461f1d119a3bf418..a2ff1ad48f7f0c19f1e3403a9d2deb8ac860cb85 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -7,11 +7,11 @@ */ #include -#include -#include struct device; +struct device_node; struct of_device_id; +struct platform_device; /** * struct of_dev_auxdata - lookup table entry for device names & platform_data diff --git a/include/linux/pci.h b/include/linux/pci.h index 99a66bc69a39332dce40733e1935c7651595806f..add9368e6314b9d7038a651af3f8e1b9e08d7ffa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1171,6 +1171,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); struct pci_dev *pci_dev_get(struct pci_dev *dev); void pci_dev_put(struct pci_dev *dev); +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) void pci_remove_bus(struct pci_bus *b); void pci_stop_and_remove_bus_device(struct pci_dev *dev); void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); @@ -1367,6 +1368,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps); u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +int pcie_link_speed_mbps(struct pci_dev *pdev); void pcie_print_link_status(struct pci_dev *dev); int pcie_reset_flr(struct pci_dev *dev, bool probe); int pcie_flr(struct pci_dev *dev); @@ -1877,6 +1879,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h index 2543c2a1c9aef76717fb539f4c94d2b59e0b7b33..e2e8957683116fd3c3f254639f082d3f0f26b27b 100644 --- a/include/linux/platform_data/i2c-mux-reg.h +++ b/include/linux/platform_data/i2c-mux-reg.h @@ -17,7 +17,6 @@ * @n_values: Number of multiplexer channels * @little_endian: Indicating if the register is in little endian * @write_only: Reading the register is not allowed by hardware - * @classes: Optional I2C auto-detection classes * @idle: Value to write to mux when idle * @idle_in_use: indicate if idle value is in use * @reg: Virtual address of the register to switch channel @@ -30,7 +29,6 @@ struct i2c_mux_reg_platform_data { int n_values; bool little_endian; bool write_only; - const unsigned int *classes; u32 idle; bool idle_in_use; void __iomem *reg; diff --git a/include/linux/platform_data/keypad-omap.h b/include/linux/platform_data/keypad-omap.h index 3e7c64c854f4cd2b8817384a43681bcd8515e7de..f3f1311cdf3aa5ef9440664f38d37c1e94fff485 100644 --- a/include/linux/platform_data/keypad-omap.h +++ b/include/linux/platform_data/keypad-omap.h @@ -19,9 +19,6 @@ struct omap_kp_platform_data { bool rep; unsigned long delay; bool dbounce; - /* specific to OMAP242x*/ - unsigned int *row_gpios; - unsigned int *col_gpios; }; /* Group (0..3) -- when multiple keys are pressed, only the diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 7c8d65414a70ad5784badca31fedf2c95d44fc0c..7d8025fb74b701d6ac64cd3add0f7ca297fe5413 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -83,5 +83,6 @@ struct bq27xxx_device_info { void bq27xxx_battery_update(struct bq27xxx_device_info *di); int bq27xxx_battery_setup(struct bq27xxx_device_info *di); void bq27xxx_battery_teardown(struct bq27xxx_device_info *di); +extern const struct dev_pm_ops bq27xxx_battery_battery_pm_ops; #endif diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 782e14f62201f7c76e911886b3ef263d116a3298..fa802db216f94f8aa7349b02216f536dfa494279 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -141,6 +141,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter); bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer); void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_reset_online_cpus(struct trace_buffer *buffer); @@ -191,15 +192,24 @@ bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); -void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data); -int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, +struct buffer_data_read_page; +struct buffer_data_read_page * +ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, + struct buffer_data_read_page *page); +int ring_buffer_read_page(struct trace_buffer *buffer, + struct buffer_data_read_page *data_page, size_t len, int cpu, int full); +void *ring_buffer_read_page_data(struct buffer_data_read_page *page); struct trace_seq; int ring_buffer_print_entry_header(struct trace_seq *s); -int ring_buffer_print_page_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s); + +int ring_buffer_subbuf_order_get(struct trace_buffer *buffer); +int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order); +int ring_buffer_subbuf_size_get(struct trace_buffer *buffer); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/include/linux/sched.h b/include/linux/sched.h index c169d0a2a0ee0f2ba818082872757f4450f2f7d4..31fe434a5a659319be447fd04fcb27d63dec0830 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -923,7 +923,7 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif - /* Bit to tell LSMs we're in execve(): */ + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK @@ -960,7 +960,7 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_CPU_SUP_INTEL diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 5fb1f12c33f90232e774fa2ff5988023277c55f4..c44f4b47b945306318d8ed164c498abfe2512a10 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -22,9 +22,8 @@ struct seq_buf { }; #define DECLARE_SEQ_BUF(NAME, SIZE) \ - char __ ## NAME ## _buffer[SIZE] = ""; \ struct seq_buf NAME = { \ - .buffer = &__ ## NAME ## _buffer, \ + .buffer = (char[SIZE]) { 0 }, \ .size = SIZE, \ } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a5ae952454c890c4aa95aadf2f3bc79ff782279a..2dde34c29203be8c0ead789ac93fabd23120727f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -295,7 +295,7 @@ struct nf_bridge_info { u8 bridged_dnat:1; u8 sabotage_in_done:1; __u16 frag_max_size; - struct net_device *physindev; + int physinif; /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 4f3d14bb15385a2860e4e1ee54738f60eb7c1b06..66f814b63a435f65bcdf8743d7adbfde39ca7ad7 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -886,7 +886,8 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @link_id: Link id number, can be 0 to N, unique for each Master + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller * @id: bus system-wide unique id * @slaves: list of Slaves on this bus * @assigned: Bitmap for Slave device numbers. @@ -918,6 +919,7 @@ struct sdw_master_ops { struct sdw_bus { struct device *dev; struct sdw_master_device *md; + int controller_id; unsigned int link_id; int id; struct list_head slaves; @@ -1040,7 +1042,7 @@ int sdw_compute_params(struct sdw_bus *bus); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, @@ -1062,7 +1064,7 @@ void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_slave(struct sdw_slave *slave, @@ -1084,7 +1086,7 @@ int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); static inline int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index eaac8b0da25b8aef964a311eee34d0313c549838..3fcd20de6ca88e83abedf8329a3528aacead6f6d 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -449,6 +449,12 @@ static __always_inline int spin_is_contended(spinlock_t *lock) return raw_spin_is_contended(&lock->rlock); } +#define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) + +#else /* !CONFIG_PREEMPT_RT */ +# include +#endif /* CONFIG_PREEMPT_RT */ + /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPTION, @@ -480,12 +486,6 @@ static inline int rwlock_needbreak(rwlock_t *lock) #endif } -#define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) - -#else /* !CONFIG_PREEMPT_RT */ -# include -#endif /* CONFIG_PREEMPT_RT */ - /* * Pull the atomic_t declaration: * (asm-mips/atomic.h needs above definitions) diff --git a/include/linux/string.h b/include/linux/string.h index ce137830a0b99c1f79b100b857966443199e9777..ab148d8dbfc146d2aed178b694f33506d06bfd05 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -66,9 +66,6 @@ extern char * strcpy(char *,const char *); #ifndef __HAVE_ARCH_STRNCPY extern char * strncpy(char *,const char *, __kernel_size_t); #endif -#ifndef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *, const char *, size_t); -#endif #ifndef __HAVE_ARCH_STRSCPY ssize_t strscpy(char *, const char *, size_t); #endif diff --git a/include/linux/trace.h b/include/linux/trace.h index 2a70a447184c9e006ac13e3353987990b9ba21c4..fdcd76b7be83d7f11c46878c19d4dab97828a446 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); -struct trace_array *trace_array_get_by_name(const char *name); +struct trace_array *trace_array_get_by_name(const char *name, const char *systems); int trace_array_destroy(struct trace_array *tr); /* For osnoise tracer */ @@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr) static inline void trace_array_put(struct trace_array *tr) { } -static inline struct trace_array *trace_array_get_by_name(const char *name) +static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { return NULL; } diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 3691e0e76a1a209601f97fe03ca97738dafcd841..9ec229dfddaa774b9c0a4f2ae410eb273061c330 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -8,11 +8,14 @@ /* * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE). + * to create a string of data to use. */ +#define TRACE_SEQ_BUFFER_SIZE (PAGE_SIZE * 2 - \ + (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) + struct trace_seq { - char buffer[PAGE_SIZE]; + char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; size_t readpos; int full; @@ -21,7 +24,7 @@ struct trace_seq { static inline void trace_seq_init(struct trace_seq *s) { - seq_buf_init(&s->seq, s->buffer, PAGE_SIZE); + seq_buf_init(&s->seq, s->buffer, TRACE_SEQ_BUFFER_SIZE); s->full = 0; s->readpos = 0; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a65b2513f8cdcba6b41be132e1f66734b1aeff8b..89b265bc6ec315bcadadebcc92b5ea4ab283822f 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -69,6 +69,13 @@ struct vfio_device { u8 iommufd_attached:1; #endif u8 cdev_opened:1; +#ifdef CONFIG_DEBUG_FS + /* + * debug_root is a static property of the vfio_device + * which must be set prior to registering the vfio_device. + */ + struct dentry *debug_root; +#endif }; /** diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 562e8754869da6c6a5c46c2dd55b4775c27b0722..85e84b92751b67354ad6671d945cd39069afcdcc 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -127,7 +127,27 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); +#define VFIO_IOWRITE_DECLATION(size) \ +int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size val, void __iomem *io); + +VFIO_IOWRITE_DECLATION(8) +VFIO_IOWRITE_DECLATION(16) +VFIO_IOWRITE_DECLATION(32) +#ifdef iowrite64 +VFIO_IOWRITE_DECLATION(64) +#endif + +#define VFIO_IOREAD_DECLATION(size) \ +int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size *val, void __iomem *io); + +VFIO_IOREAD_DECLATION(8) +VFIO_IOREAD_DECLATION(16) +VFIO_IOREAD_DECLATION(32) + #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4cc614a38376593787d7fd2842a7b3649979ead8..b0201747a263a9526c5d60c2c2644a8e064a8439 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -103,6 +103,14 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, int virtqueue_reset(struct virtqueue *vq, void (*recycle)(struct virtqueue *vq, void *buf)); +struct virtio_admin_cmd { + __le16 opcode; + __le16 group_type; + __le64 group_member_id; + struct scatterlist *data_sg; + struct scatterlist *result_sg; +}; + /** * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2b3438de2c4d4e887bdfbf89f5c137f3218b00e9..da9b271b54db8a82fce123befe529c1851bc5b7a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *); * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. + * @create_avq: create admin virtqueue resource. + * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -120,6 +122,8 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); + int (*create_avq)(struct virtio_device *vdev); + void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 27cc1d4643219a44c01a2404124cd45ef46f7f3d..4dfa9b69ca8d95d43e44831bc166eadbe5715d3c 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include +#include #include #include #include @@ -49,6 +51,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { + unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; @@ -65,6 +68,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); + nh_min_len = sizeof(struct ipv6hdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; @@ -100,7 +104,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; - p_off = skb_transport_offset(skb) + thlen; + nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb)); + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { @@ -140,7 +145,7 @@ retry: skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { - p_off = thlen; + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h new file mode 100644 index 0000000000000000000000000000000000000000..f4a100a0fe2e10815a549515d4283b7a19e48b92 --- /dev/null +++ b/include/linux/virtio_pci_admin.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_ADMIN_H +#define _LINUX_VIRTIO_PCI_ADMIN_H + +#include +#include + +#ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY +bool virtio_pci_admin_has_legacy_io(struct pci_dev *pdev); +int virtio_pci_admin_legacy_common_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_common_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, + u8 req_bar_flags, u8 *bar, + u64 *bar_offset); +#endif + +#endif /* _LINUX_VIRTIO_PCI_ADMIN_H */ diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index a09e13a577a99a0f91916ad121329dde11f42b1f..c0b1b1ca1163507dddca0870a46c93194ae07e75 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -125,4 +125,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev); +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev); #endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 6d0a14f7019d1e7b76a1931be98ff4f7fa9f0493..453736fd1d23ce673345833cc13593ce13450ba1 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -60,7 +60,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ + unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ /* * When writeback IOs are bounced through async layers, only the diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d68b0a4834315062a4b1b450dd87fb66a76218c0..8b8ed4e13d74dfaa63ede54f00c3ba5aac2f6ee8 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -128,7 +128,7 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, netdev_txq_completed_mb(txq, pkts, bytes); \ \ _res = -1; \ - if (pkts && likely(get_desc > start_thrs)) { \ + if (pkts && likely(get_desc >= start_thrs)) { \ _res = 1; \ if (unlikely(netif_tx_queue_stopped(txq)) && \ !(down_cond)) { \ diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 0a86ab8d47b9806955b5e759d59cfa7acc76b49e..b00d65417c310a42a39aec6ce927b85083ace264 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ // -// ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier +// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier // // Copyright (C) 2022 - 2023 Texas Instruments Incorporated // https://www.ti.com // -// The TAS2781 driver implements a flexible and configurable +// The TAS2563/TAS2781 driver implements a flexible and configurable // algo coefficient setting for one, two, or even multiple -// TAS2781 chips. +// TAS2563/TAS2781 chips. // // Author: Shenghao Ding // Author: Kevin Lu @@ -60,7 +60,8 @@ #define TASDEVICE_CMD_FIELD_W 0x4 enum audio_device { - TAS2781 = 0, + TAS2563, + TAS2781, }; enum device_catlog_id { diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5194b7e6dc8d07170e2fcda6079ed4d9b7f1293f..8d73171cb9f0d78672355d4e822176d95aa13cb2 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -902,37 +902,6 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); -TRACE_EVENT(afs_folio_dirty, - TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio), - - TP_ARGS(vnode, where, folio), - - TP_STRUCT__entry( - __field(struct afs_vnode *, vnode) - __field(const char *, where) - __field(pgoff_t, index) - __field(unsigned long, from) - __field(unsigned long, to) - ), - - TP_fast_assign( - unsigned long priv = (unsigned long)folio_get_private(folio); - __entry->vnode = vnode; - __entry->where = where; - __entry->index = folio_index(folio); - __entry->from = afs_folio_dirty_from(folio, priv); - __entry->to = afs_folio_dirty_to(folio, priv); - __entry->to |= (afs_is_folio_dirty_mmapped(priv) ? - (1UL << (BITS_PER_LONG - 1)) : 0); - ), - - TP_printk("vn=%p %lx %s %lx-%lx%s", - __entry->vnode, __entry->index, __entry->where, - __entry->from, - __entry->to & ~(1UL << (BITS_PER_LONG - 1)), - __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "") - ); - TRACE_EVENT(afs_call_state, TP_PROTO(struct afs_call *call, enum afs_call_state from, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index beec534cbaab25e6a2ce4aacaa09134068f7394a..447a8c21cf57df7d30de48efcd27c435fab2c308 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -16,34 +16,57 @@ * Define enums for tracing information. */ #define netfs_read_traces \ + EM(netfs_read_trace_dio_read, "DIO-READ ") \ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ EM(netfs_read_trace_readpage, "READPAGE ") \ + EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ E_(netfs_read_trace_write_begin, "WRITEBEGN") +#define netfs_write_traces \ + EM(netfs_write_trace_dio_write, "DIO-WRITE") \ + EM(netfs_write_trace_launder, "LAUNDER ") \ + EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ + EM(netfs_write_trace_writeback, "WRITEBACK") \ + E_(netfs_write_trace_writethrough, "WRITETHRU") + #define netfs_rreq_origins \ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ - E_(NETFS_READ_FOR_WRITE, "RW") + EM(NETFS_READ_FOR_WRITE, "RW") \ + EM(NETFS_WRITEBACK, "WB") \ + EM(NETFS_WRITETHROUGH, "WT") \ + EM(NETFS_LAUNDER_WRITE, "LW") \ + EM(NETFS_UNBUFFERED_WRITE, "UW") \ + EM(NETFS_DIO_READ, "DR") \ + E_(NETFS_DIO_WRITE, "DW") #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ - E_(netfs_rreq_trace_unmark, "UNMARK ") + EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ + EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ + E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ EM(NETFS_READ_FROM_CACHE, "READ") \ - E_(NETFS_INVALID_READ, "INVL") \ + EM(NETFS_INVALID_READ, "INVL") \ + EM(NETFS_UPLOAD_TO_SERVER, "UPLD") \ + EM(NETFS_WRITE_TO_CACHE, "WRIT") \ + E_(NETFS_INVALID_WRITE, "INVL") #define netfs_sreq_traces \ EM(netfs_sreq_trace_download_instead, "RDOWN") \ EM(netfs_sreq_trace_free, "FREE ") \ + EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_resubmit_short, "SHORT") \ EM(netfs_sreq_trace_submit, "SUBMT") \ @@ -55,19 +78,24 @@ #define netfs_failures \ EM(netfs_fail_check_write_begin, "check-write-begin") \ EM(netfs_fail_copy_to_cache, "copy-to-cache") \ + EM(netfs_fail_dio_read_short, "dio-read-short") \ + EM(netfs_fail_dio_read_zero, "dio-read-zero") \ EM(netfs_fail_read, "read") \ EM(netfs_fail_short_read, "short-read") \ - E_(netfs_fail_prepare_write, "prep-write") + EM(netfs_fail_prepare_write, "prep-write") \ + E_(netfs_fail_write, "write") #define netfs_rreq_ref_traces \ - EM(netfs_rreq_trace_get_hold, "GET HOLD ") \ + EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ - EM(netfs_rreq_trace_put_hold, "PUT HOLD ") \ + EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ + EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ - EM(netfs_rreq_trace_put_zero_len, "PUT ZEROLEN") \ + EM(netfs_rreq_trace_put_work, "PUT WORK ") \ + EM(netfs_rreq_trace_see_work, "SEE WORK ") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ @@ -76,11 +104,44 @@ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ + EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \ + EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ + EM(netfs_sreq_trace_put_work, "PUT WORK ") \ E_(netfs_sreq_trace_put_terminated, "PUT TERM ") +#define netfs_folio_traces \ + /* The first few correspond to enum netfs_how_to_modify */ \ + EM(netfs_folio_is_uptodate, "mod-uptodate") \ + EM(netfs_just_prefetch, "mod-prefetch") \ + EM(netfs_whole_folio_modify, "mod-whole-f") \ + EM(netfs_modify_and_clear, "mod-n-clear") \ + EM(netfs_streaming_write, "mod-streamw") \ + EM(netfs_streaming_write_cont, "mod-streamw+") \ + EM(netfs_flush_content, "flush") \ + EM(netfs_streaming_filled_page, "mod-streamw-f") \ + EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ + /* The rest are for writeback */ \ + EM(netfs_folio_trace_clear, "clear") \ + EM(netfs_folio_trace_clear_s, "clear-s") \ + EM(netfs_folio_trace_clear_g, "clear-g") \ + EM(netfs_folio_trace_copy_to_cache, "copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ + EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ + EM(netfs_folio_trace_kill, "kill") \ + EM(netfs_folio_trace_launder, "launder") \ + EM(netfs_folio_trace_mkwrite, "mkwrite") \ + EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ + EM(netfs_folio_trace_read_gaps, "read-gaps") \ + EM(netfs_folio_trace_redirty, "redirty") \ + EM(netfs_folio_trace_redirtied, "redirtied") \ + EM(netfs_folio_trace_store, "store") \ + EM(netfs_folio_trace_store_plus, "store+") \ + EM(netfs_folio_trace_wthru, "wthru") \ + E_(netfs_folio_trace_wthru_plus, "wthru+") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -90,11 +151,13 @@ #define E_(a, b) a enum netfs_read_trace { netfs_read_traces } __mode(byte); +enum netfs_write_trace { netfs_write_traces } __mode(byte); enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte); enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte); enum netfs_failure { netfs_failures } __mode(byte); enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); +enum netfs_folio_trace { netfs_folio_traces } __mode(byte); #endif @@ -107,6 +170,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); #define E_(a, b) TRACE_DEFINE_ENUM(a); netfs_read_traces; +netfs_write_traces; netfs_rreq_origins; netfs_rreq_traces; netfs_sreq_sources; @@ -114,6 +178,7 @@ netfs_sreq_traces; netfs_failures; netfs_rreq_ref_traces; netfs_sreq_ref_traces; +netfs_folio_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -314,6 +379,82 @@ TRACE_EVENT(netfs_sreq_ref, __entry->ref) ); +TRACE_EVENT(netfs_folio, + TP_PROTO(struct folio *folio, enum netfs_folio_trace why), + + TP_ARGS(folio, why), + + TP_STRUCT__entry( + __field(ino_t, ino) + __field(pgoff_t, index) + __field(unsigned int, nr) + __field(enum netfs_folio_trace, why) + ), + + TP_fast_assign( + __entry->ino = folio->mapping->host->i_ino; + __entry->why = why; + __entry->index = folio_index(folio); + __entry->nr = folio_nr_pages(folio); + ), + + TP_printk("i=%05lx ix=%05lx-%05lx %s", + __entry->ino, __entry->index, __entry->index + __entry->nr - 1, + __print_symbolic(__entry->why, netfs_folio_traces)) + ); + +TRACE_EVENT(netfs_write_iter, + TP_PROTO(const struct kiocb *iocb, const struct iov_iter *from), + + TP_ARGS(iocb, from), + + TP_STRUCT__entry( + __field(unsigned long long, start ) + __field(size_t, len ) + __field(unsigned int, flags ) + ), + + TP_fast_assign( + __entry->start = iocb->ki_pos; + __entry->len = iov_iter_count(from); + __entry->flags = iocb->ki_flags; + ), + + TP_printk("WRITE-ITER s=%llx l=%zx f=%x", + __entry->start, __entry->len, __entry->flags) + ); + +TRACE_EVENT(netfs_write, + TP_PROTO(const struct netfs_io_request *wreq, + enum netfs_write_trace what), + + TP_ARGS(wreq, what), + + TP_STRUCT__entry( + __field(unsigned int, wreq ) + __field(unsigned int, cookie ) + __field(enum netfs_write_trace, what ) + __field(unsigned long long, start ) + __field(size_t, len ) + ), + + TP_fast_assign( + struct netfs_inode *__ctx = netfs_inode(wreq->inode); + struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); + __entry->wreq = wreq->debug_id; + __entry->cookie = __cookie ? __cookie->debug_id : 0; + __entry->what = what; + __entry->start = wreq->start; + __entry->len = wreq->len; + ), + + TP_printk("R=%08x %s c=%08x by=%llx-%llx", + __entry->wreq, + __print_symbolic(__entry->what, netfs_write_traces), + __entry->cookie, + __entry->start, __entry->start + __entry->len - 1) + ); + #undef EM #undef E_ #endif /* _TRACE_NETFS_H */ diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7c29d82db9ee0dcb5ce770b384149c9734a50f30..f8bc34a6bcfa2f7313f2e9eac38e2df6a25aafca 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -614,6 +614,9 @@ struct btrfs_ioctl_clone_range_args { */ #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ + BTRFS_DEFRAG_RANGE_START_IO) + struct btrfs_ioctl_defrag_range_args { /* start of the defrag operation */ __u64 start; diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 14bc6e7421483105c7ab0fd4e664d354cc3386b1..42066f4eb890362b2c4f150245db5bd1bf399a10 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -46,6 +46,7 @@ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ ___DEPRECATED(SCAN_MEDIA, "Scan Media"), \ ___DEPRECATED(GET_SCAN_MEDIA, "Get Scan Media Results"), \ + ___C(GET_TIMESTAMP, "Get Timestamp"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 0b2bc6252e2ca2840b556ee6dd858ae123f22c9b..1dfeaa2e649ee41751162073463df20cb0a130ad 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -49,6 +49,7 @@ enum { IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, + IOMMUFD_CMD_HWPT_INVALIDATE, }; /** @@ -613,4 +614,82 @@ struct iommu_hwpt_get_dirty_bitmap { #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) +/** + * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation + * Data Type + * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + */ +enum iommu_hwpt_invalidate_data_type { + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, +}; + +/** + * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d + * stage-1 cache invalidation + * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies + * to all-levels page structure cache or just + * the leaf PTE cache. + */ +enum iommu_hwpt_vtd_s1_invalidate_flags { + IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, +}; + +/** + * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation + * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) + * @addr: The start address of the range to be invalidated. It needs to + * be 4KB aligned. + * @npages: Number of contiguous 4K pages to be invalidated. + * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags + * @__reserved: Must be 0 + * + * The Intel VT-d specific invalidation data for user-managed stage-1 cache + * invalidation in nested translation. Userspace uses this structure to + * tell the impacted cache scope after modifying the stage-1 page table. + * + * Invalidating all the caches related to the page table by setting @addr + * to be 0 and @npages to be U64_MAX. + * + * The device TLB will be invalidated automatically if ATS is enabled. + */ +struct iommu_hwpt_vtd_s1_invalidate { + __aligned_u64 addr; + __aligned_u64 npages; + __u32 flags; + __u32 __reserved; +}; + +/** + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) + * @size: sizeof(struct iommu_hwpt_invalidate) + * @hwpt_id: ID of a nested HWPT for cache invalidation + * @data_uptr: User pointer to an array of driver-specific cache invalidation + * data. + * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data + * type of all the entries in the invalidation request array. It + * should be a type supported by the hwpt pointed by @hwpt_id. + * @entry_len: Length (in bytes) of a request entry in the request array + * @entry_num: Input the number of cache invalidation requests in the array. + * Output the number of requests successfully handled by kernel. + * @__reserved: Must be 0. + * + * Invalidate the iommu cache for user-managed page table. Modifications on a + * user-managed page table should be followed by this operation to sync cache. + * Each ioctl can support one or more cache invalidation requests in the array + * that has a total size of @entry_len * @entry_num. + * + * An empty invalidation request array by setting @entry_num==0 is allowed, and + * @entry_len and @data_uptr would be ignored in this case. This can be used to + * check if the given @data_type is supported or not by kernel. + */ +struct iommu_hwpt_invalidate { + __u32 size; + __u32 hwpt_id; + __aligned_u64 data_uptr; + __u32 data_type; + __u32 entry_len; + __u32 entry_num; + __u32 __reserved; +}; +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) #endif diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7f5fb010226d8cb80a4e435209b3b69ba6e80e35..2b68e6cdf1902f49f8f1cc04ae5b502110a959d3 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1219,6 +1219,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, VFIO_DEVICE_STATE_PRE_COPY = 6, VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, }; /** diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 8881aea60f6f11be1aa2fd20f813883546c8e74a..2445f365bce74b4e926c6929322b269252ab6830 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 41 +#define VIRTIO_TRANSPORT_F_END 42 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -114,4 +114,10 @@ * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 + +/* + * This feature indicates that the device support administration virtqueues. + */ +#define VIRTIO_F_ADMIN_VQ 41 + #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 44f4dd2add188090ff3b03d859fb4d27009d5479..ef3810dee7efac5d337d55db03131cb4796f4358 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg { __le16 queue_notify_data; /* read-write */ __le16 queue_reset; /* read-write */ + + __le16 admin_queue_index; /* read-only */ + __le16 admin_queue_num; /* read-only */ }; /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ @@ -215,7 +218,72 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 #define VIRTIO_PCI_COMMON_Q_RESET 58 +#define VIRTIO_PCI_COMMON_ADM_Q_IDX 60 +#define VIRTIO_PCI_COMMON_ADM_Q_NUM 62 #endif /* VIRTIO_PCI_NO_MODERN */ +/* Admin command status. */ +#define VIRTIO_ADMIN_STATUS_OK 0 + +/* Admin command opcode. */ +#define VIRTIO_ADMIN_CMD_LIST_QUERY 0x0 +#define VIRTIO_ADMIN_CMD_LIST_USE 0x1 + +/* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 + +/* Transitional device admin command. */ +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE 0x2 +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ 0x3 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE 0x4 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 +#define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +struct __packed virtio_admin_cmd_hdr { + __le16 opcode; + /* + * 1 - SR-IOV + * 2-65535 - reserved + */ + __le16 group_type; + /* Unused, reserved for future extensions. */ + __u8 reserved1[12]; + __le64 group_member_id; +}; + +struct __packed virtio_admin_cmd_status { + __le16 status; + __le16 status_qualifier; + /* Unused, reserved for future extensions. */ + __u8 reserved2[4]; +}; + +struct __packed virtio_admin_cmd_legacy_wr_data { + __u8 offset; /* Starting offset of the register(s) to write. */ + __u8 reserved[7]; + __u8 registers[]; +}; + +struct __packed virtio_admin_cmd_legacy_rd_data { + __u8 offset; /* Starting offset of the register(s) to read. */ +}; + +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2 + +#define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +struct __packed virtio_admin_cmd_notify_info_data { + __u8 flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + __u8 bar; /* BAR of the member or the owner device */ + __u8 padding[6]; + __le64 offset; /* Offset within bar. */ +}; + +struct virtio_admin_cmd_notify_info_result { + struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; +}; + #endif diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index d676b3620383c3cb76be7b4aa3651b1a99e5ded9..ede4f3564977dcd477e386c51c2a73a11dac43a2 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -14,6 +14,13 @@ #include #include +/* Feature bits */ +/* guest physical address range will be indicated as shared memory region 0 */ +#define VIRTIO_PMEM_F_SHMEM_REGION 0 + +/* shmid of the shared memory region corresponding to the pmem */ +#define VIRTIO_PMEM_SHMEM_REGION_ID 0 + struct virtio_pmem_config { __le64 start; __le64 size; diff --git a/init/Kconfig b/init/Kconfig index b8a2f693a5649eb877b2b91ee24bea5a51bb092a..a500490d2954f45cda27968509244bb7a2cbd2ad 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -876,6 +876,18 @@ config CC_NO_ARRAY_BOUNDS bool default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS +# Currently, disable -Wstringop-overflow for GCC 11, globally. +config GCC11_NO_STRINGOP_OVERFLOW + def_bool y + +config CC_NO_STRINGOP_OVERFLOW + bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW + +config CC_STRINGOP_OVERFLOW + bool + default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW + # # For architectures that know their GCC __int128 support is sound # diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index aa5fa5059a895c64db808affa77d1182b782953c..5ede6669972cee749fdcabda018880b938704da2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -137,6 +137,14 @@ struct io_defer_entry { #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) #define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) +/* + * No waiters. It's larger than any valid value of the tw counter + * so that tests against ->cq_wait_nr would fail and skip wake_up(). + */ +#define IO_CQ_WAKE_INIT (-1U) +/* Forced wake up if there is a waiter regardless of ->cq_wait_nr */ +#define IO_CQ_WAKE_FORCE (IO_CQ_WAKE_INIT >> 1) + static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all); @@ -303,6 +311,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) goto err; ctx->flags = p->flags; + atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); init_waitqueue_head(&ctx->sqo_sq_wait); INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->cq_overflow_list); @@ -1304,16 +1313,23 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags) { struct io_ring_ctx *ctx = req->ctx; unsigned nr_wait, nr_tw, nr_tw_prev; - struct llist_node *first; + struct llist_node *head; + + /* See comment above IO_CQ_WAKE_INIT */ + BUILD_BUG_ON(IO_CQ_WAKE_FORCE <= IORING_MAX_CQ_ENTRIES); + /* + * We don't know how many reuqests is there in the link and whether + * they can even be queued lazily, fall back to non-lazy. + */ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) flags &= ~IOU_F_TWQ_LAZY_WAKE; - first = READ_ONCE(ctx->work_llist.first); + head = READ_ONCE(ctx->work_llist.first); do { nr_tw_prev = 0; - if (first) { - struct io_kiocb *first_req = container_of(first, + if (head) { + struct io_kiocb *first_req = container_of(head, struct io_kiocb, io_task_work.node); /* @@ -1322,17 +1338,29 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags) */ nr_tw_prev = READ_ONCE(first_req->nr_tw); } + + /* + * Theoretically, it can overflow, but that's fine as one of + * previous adds should've tried to wake the task. + */ nr_tw = nr_tw_prev + 1; - /* Large enough to fail the nr_wait comparison below */ if (!(flags & IOU_F_TWQ_LAZY_WAKE)) - nr_tw = -1U; + nr_tw = IO_CQ_WAKE_FORCE; req->nr_tw = nr_tw; - req->io_task_work.node.next = first; - } while (!try_cmpxchg(&ctx->work_llist.first, &first, + req->io_task_work.node.next = head; + } while (!try_cmpxchg(&ctx->work_llist.first, &head, &req->io_task_work.node)); - if (!first) { + /* + * cmpxchg implies a full barrier, which pairs with the barrier + * in set_current_state() on the io_cqring_wait() side. It's used + * to ensure that either we see updated ->cq_wait_nr, or waiters + * going to sleep will observe the work added to the list, which + * is similar to the wait/wawke task state sync. + */ + + if (!head) { if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); if (ctx->has_evfd) @@ -1340,14 +1368,12 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags) } nr_wait = atomic_read(&ctx->cq_wait_nr); - /* no one is waiting */ - if (!nr_wait) + /* not enough or no one is waiting */ + if (nr_tw < nr_wait) return; - /* either not enough or the previous add has already woken it up */ - if (nr_wait > nr_tw || nr_tw_prev >= nr_wait) + /* the previous add has already woken it up */ + if (nr_tw_prev >= nr_wait) return; - /* pairs with set_current_state() in io_cqring_wait() */ - smp_mb__after_atomic(); wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE); } @@ -2035,9 +2061,10 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, goto out; fd = array_index_nospec(fd, ctx->nr_user_files); slot = io_fixed_file_slot(&ctx->file_table, fd); - file = io_slot_file(slot); + if (!req->rsrc_node) + __io_req_set_rsrc_node(req, ctx); req->flags |= io_slot_flags(slot); - io_req_set_rsrc_node(req, ctx, 0); + file = io_slot_file(slot); out: io_ring_submit_unlock(ctx, issue_flags); return file; @@ -2648,7 +2675,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ret = io_cqring_wait_schedule(ctx, &iowq); __set_current_state(TASK_RUNNING); - atomic_set(&ctx->cq_wait_nr, 0); + atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); /* * Run task_work after scheduling and before io_should_wake(). diff --git a/io_uring/register.c b/io_uring/register.c index 708dd1d89add4ab09ac4dd90a4a3bbaba410ea4d..5e62c1208996542537c6aedf4d57506863165e10 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -278,13 +279,14 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, if (len > cpumask_size()) len = cpumask_size(); - if (in_compat_syscall()) { +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) ret = compat_get_bitmap(cpumask_bits(new_mask), (const compat_ulong_t __user *)arg, len * 8 /* CHAR_BIT */); - } else { + else +#endif ret = copy_from_user(new_mask, arg, len); - } if (ret) { free_cpumask_var(new_mask); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 7238b9cfe33b60b7520905d7f632951212c9677c..c6f199bbee2843dfea2d88729b707d46a168d3d9 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -102,17 +102,21 @@ static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx, node->refs++; } +static inline void __io_req_set_rsrc_node(struct io_kiocb *req, + struct io_ring_ctx *ctx) +{ + lockdep_assert_held(&ctx->uring_lock); + req->rsrc_node = ctx->rsrc_node; + io_charge_rsrc_node(ctx, ctx->rsrc_node); +} + static inline void io_req_set_rsrc_node(struct io_kiocb *req, struct io_ring_ctx *ctx, unsigned int issue_flags) { if (!req->rsrc_node) { io_ring_submit_lock(ctx, issue_flags); - - lockdep_assert_held(&ctx->uring_lock); - - req->rsrc_node = ctx->rsrc_node; - io_charge_rsrc_node(ctx, ctx->rsrc_node); + __io_req_set_rsrc_node(req, ctx); io_ring_submit_unlock(ctx, issue_flags); } } diff --git a/io_uring/rw.c b/io_uring/rw.c index 0c856726b15db330daf6470d3e8baea4d90ad17b..118cc9f1cf1602a4859eb3359c8b2e64cf6db620 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -168,27 +168,6 @@ void io_readv_writev_cleanup(struct io_kiocb *req) kfree(io->free_iovec); } -static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) -{ - switch (ret) { - case -EIOCBQUEUED: - break; - case -ERESTARTSYS: - case -ERESTARTNOINTR: - case -ERESTARTNOHAND: - case -ERESTART_RESTARTBLOCK: - /* - * We can't just restart the syscall, since previously - * submitted sqes may already be in progress. Just fail this - * IO with EINTR. - */ - ret = -EINTR; - fallthrough; - default: - kiocb->ki_complete(kiocb, ret); - } -} - static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); @@ -371,6 +350,33 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) smp_store_release(&req->iopoll_completed, 1); } +static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) +{ + /* IO was queued async, completion will happen later */ + if (ret == -EIOCBQUEUED) + return; + + /* transform internal restart error codes */ + if (unlikely(ret < 0)) { + switch (ret) { + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * We can't just restart the syscall, since previously + * submitted sqes may already be in progress. Just fail + * this IO with EINTR. + */ + ret = -EINTR; + break; + } + } + + INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll, + io_complete_rw, kiocb, ret); +} + static int kiocb_done(struct io_kiocb *req, ssize_t ret, unsigned int issue_flags) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 51e8b4bee0c8323b42cdee3c29cdaca00bbb95aa..59647118917676c7aa3c69a20cffd0946ff7a2fb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5615,21 +5615,46 @@ static u8 bpf_ctx_convert_map[] = { #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE -const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static const struct btf_type *find_canonical_prog_ctx_type(enum bpf_prog_type prog_type) { const struct btf_type *conv_struct; - const struct btf_type *ctx_struct; const struct btf_member *ctx_type; - const char *tname, *ctx_tname; conv_struct = bpf_ctx_convert.t; - if (!conv_struct) { - bpf_log(log, "btf_vmlinux is malformed\n"); + if (!conv_struct) return NULL; - } + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; + /* ctx_type is a pointer to prog_ctx_type in vmlinux. + * Like 'struct __sk_buff' + */ + return btf_type_by_id(btf_vmlinux, ctx_type->type); +} + +static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) +{ + const struct btf_type *conv_struct; + const struct btf_member *ctx_type; + + conv_struct = bpf_ctx_convert.t; + if (!conv_struct) + return -EFAULT; + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + /* ctx_type is a pointer to prog_ctx_type in vmlinux. + * Like 'struct sk_buff' + */ + return ctx_type->type; +} + +const struct btf_type * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) +{ + const struct btf_type *ctx_type; + const char *tname, *ctx_tname; + t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); @@ -5646,17 +5671,15 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, bpf_log(log, "arg#%d struct doesn't have a name\n", arg); return NULL; } - /* prog_type is valid bpf program type. No need for bounds check. */ - ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; - /* ctx_struct is a pointer to prog_ctx_type in vmlinux. - * Like 'struct __sk_buff' - */ - ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); - if (!ctx_struct) + + ctx_type = find_canonical_prog_ctx_type(prog_type); + if (!ctx_type) { + bpf_log(log, "btf_vmlinux is malformed\n"); /* should not happen */ return NULL; + } again: - ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); if (!ctx_tname) { /* should not happen */ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); @@ -5677,28 +5700,167 @@ again: /* bpf_user_pt_regs_t is a typedef, so resolve it to * underlying struct and check name again */ - if (!btf_type_is_modifier(ctx_struct)) + if (!btf_type_is_modifier(ctx_type)) return NULL; - while (btf_type_is_modifier(ctx_struct)) - ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type); + while (btf_type_is_modifier(ctx_type)) + ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); goto again; } return ctx_type; } +/* forward declarations for arch-specific underlying types of + * bpf_user_pt_regs_t; this avoids the need for arch-specific #ifdef + * compilation guards below for BPF_PROG_TYPE_PERF_EVENT checks, but still + * works correctly with __builtin_types_compatible_p() on respective + * architectures + */ +struct user_regs_struct; +struct user_pt_regs; + +static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int arg, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type) +{ + const struct btf_type *ctx_type; + const char *tname, *ctx_tname; + + if (!btf_is_ptr(t)) { + bpf_log(log, "arg#%d type isn't a pointer\n", arg); + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); + + /* KPROBE and PERF_EVENT programs allow bpf_user_pt_regs_t typedef */ + if (prog_type == BPF_PROG_TYPE_KPROBE || prog_type == BPF_PROG_TYPE_PERF_EVENT) { + while (btf_type_is_modifier(t) && !btf_type_is_typedef(t)) + t = btf_type_by_id(btf, t->type); + + if (btf_type_is_typedef(t)) { + tname = btf_name_by_offset(btf, t->name_off); + if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) + return 0; + } + } + + /* all other program types don't use typedefs for context type */ + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + + /* `void *ctx __arg_ctx` is always valid */ + if (btf_type_is_void(t)) + return 0; + + tname = btf_name_by_offset(btf, t->name_off); + if (str_is_empty(tname)) { + bpf_log(log, "arg#%d type doesn't have a name\n", arg); + return -EINVAL; + } + + /* special cases */ + switch (prog_type) { + case BPF_PROG_TYPE_KPROBE: + if (__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + __btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + __btf_type_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + __btf_type_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return 0; + break; + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_PROG_TYPE_TRACING: + switch (attach_type) { + case BPF_TRACE_RAW_TP: + /* tp_btf program is TRACING, so need special case here */ + if (__btf_type_is_struct(t) && + strcmp(tname, "bpf_raw_tracepoint_args") == 0) + return 0; + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_TRACE_ITER: + /* allow struct bpf_iter__xxx types only */ + if (__btf_type_is_struct(t) && + strncmp(tname, "bpf_iter__", sizeof("bpf_iter__") - 1) == 0) + return 0; + break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + default: + break; + } + break; + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_SYSCALL: + case BPF_PROG_TYPE_EXT: + return 0; /* anything goes */ + default: + break; + } + + ctx_type = find_canonical_prog_ctx_type(prog_type); + if (!ctx_type) { + /* should not happen */ + bpf_log(log, "btf_vmlinux is malformed\n"); + return -EINVAL; + } + + /* resolve typedefs and check that underlying structs are matching as well */ + while (btf_type_is_modifier(ctx_type)) + ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); + + /* if program type doesn't have distinctly named struct type for + * context, then __arg_ctx argument can only be `void *`, which we + * already checked above + */ + if (!__btf_type_is_struct(ctx_type)) { + bpf_log(log, "arg#%d should be void pointer\n", arg); + return -EINVAL; + } + + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); + if (!__btf_type_is_struct(t) || strcmp(ctx_tname, tname) != 0) { + bpf_log(log, "arg#%d should be `struct %s *`\n", arg, ctx_tname); + return -EINVAL; + } + + return 0; +} + static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) { - const struct btf_member *prog_ctx_type, *kern_ctx_type; - - prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg); - if (!prog_ctx_type) + if (!btf_get_prog_ctx_type(log, btf, t, prog_type, arg)) return -ENOENT; - kern_ctx_type = prog_ctx_type + 1; - return kern_ctx_type->type; + return find_kern_ctx_type_id(prog_type); } int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) @@ -6934,6 +7096,23 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) return -EINVAL; } + for (i = 0; i < nargs; i++) { + const char *tag; + + if (sub->args[i].arg_type != ARG_PTR_TO_CTX) + continue; + + /* check if arg has "arg:ctx" tag */ + t = btf_type_by_id(btf, args[i].type); + tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:"); + if (IS_ERR_OR_NULL(tag) || strcmp(tag, "ctx") != 0) + continue; + + if (btf_validate_prog_ctx_type(log, btf, t, i, prog_type, + prog->expected_attach_type)) + return -EINVAL; + } + sub->arg_cnt = nargs; sub->args_cached = true; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index adbf330d364bba994488c0d835f08f8cffa67f63..65f598694d550359f2b926ef26ae30d0c80c6f69 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12826,6 +12826,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } switch (base_type(ptr_reg->type)) { + case PTR_TO_FLOW_KEYS: + if (known) + break; + fallthrough; case CONST_PTR_TO_MAP: /* smin_val represents the known value */ if (known && smin_val == 0 && opcode == BPF_ADD) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 6b213c8252d62df7a30dd2b0f861e6bb1895be36..d05066cb40b2ee504d780b5f9ea5d16e17bcbbd1 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1348,8 +1348,6 @@ do_full_getstr: /* PROMPT can only be set if we have MEM_READ permission. */ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), raw_smp_processor_id()); - if (defcmd_in_progress) - strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN); /* * Fetch command from keyboard diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 1dd9670a99a9eea0a7821ad4df8a0346ce531b85..cbff300f95bd2a040ba545ef3ee23a1f386bfc31 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -62,7 +62,8 @@ enum map_err_types { * @pfn: page frame of the start address * @offset: offset of mapping relative to pfn * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected + * @stack_len: number of backtrace entries in @stack_entries + * @stack_entries: stack of backtrace history */ struct dma_debug_entry { struct list_head list; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 97c298b210bc71d75eac5ce0dee78ae39f44a746..b079a9a8e08795b40fb0cc3891a68bcb90922876 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1136,6 +1136,9 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, int cpu, i; int index; + if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) + return -1; + cpu = raw_smp_processor_id(); for (i = 0; i < default_nareas; ++i) { index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, diff --git a/kernel/fork.c b/kernel/fork.c index e3e6d7353222dc3b056330da88f29886e5f6d6fc..94db30844cbe049f402ba76b191d5a80bb7f1185 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1181,7 +1181,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID tsk->pasid_activated = 0; #endif @@ -1759,6 +1759,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ spin_lock(&fs->lock); + /* "users" and "in_exec" locked for check_unsafe_exec() */ if (fs->in_exec) { spin_unlock(&fs->lock); return -EAGAIN; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1ae8517778066284be5c7c15111b09a0f726f164..b2bccfd37c383d04692fb6a7a72eb71a1f62798b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d7cea5d591f95d823b63972da899dded9e369d1..2ac440bc7e10bc8e1248eae47a661eb017768cee 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -201,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 95c3c097083e505f41931168cef46f84cf4fac52..eece6244f9d2fea301f5523ef1d7d6779f0d4625 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -133,7 +133,11 @@ unsigned long get_capacity_ref_freq(struct cpufreq_policy *policy) if (arch_scale_freq_invariant()) return policy->cpuinfo.max_freq; - return policy->cur; + /* + * Apply a 25% margin so that we select a higher frequency than + * the current one before the CPU is fully busy: + */ + return policy->cur + (policy->cur >> 2); } /** diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b99f9d6c525ca3d787509552eb7cb38b7aef414e..a8d91cd9e101d5b0bd332187ef166f1cb689a1ee 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1581,13 +1581,18 @@ void tick_setup_sched_timer(void) void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t idle_sleeptime, iowait_sleeptime; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) hrtimer_cancel(&ts->sched_timer); # endif + idle_sleeptime = ts->idle_sleeptime; + iowait_sleeptime = ts->iowait_sleeptime; memset(ts, 0, sizeof(*ts)); + ts->idle_sleeptime = idle_sleeptime; + ts->iowait_sleeptime = iowait_sleeptime; } #endif diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9286f88fcd32ac329bdd9b113d2bcb995de7f39b..13aaf5e85b811b72f60b355f833a680342a41c7f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -27,6 +27,7 @@ #include #include +#include #include /* @@ -317,6 +318,11 @@ struct buffer_data_page { unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ }; +struct buffer_data_read_page { + unsigned order; /* order of the page */ + struct buffer_data_page *data; /* actual data, stored in this page */ +}; + /* * Note, the buffer_page list must be first. The buffer pages * are allocated in cache lines, which means that each buffer @@ -331,6 +337,7 @@ struct buffer_page { unsigned read; /* index for next read */ local_t entries; /* entries on this page */ unsigned long real_end; /* real end of data */ + unsigned order; /* order of the page */ struct buffer_data_page *page; /* Actual data page */ }; @@ -361,7 +368,7 @@ static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) static void free_buffer_page(struct buffer_page *bpage) { - free_page((unsigned long)bpage->page); + free_pages((unsigned long)bpage->page, bpage->order); kfree(bpage); } @@ -373,41 +380,6 @@ static inline bool test_time_stamp(u64 delta) return !!(delta & TS_DELTA_TEST); } -#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) - -/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ -#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) - -int ring_buffer_print_page_header(struct trace_seq *s) -{ - struct buffer_data_page field; - - trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\tsigned:%u;\n", - (unsigned int)sizeof(field.time_stamp), - (unsigned int)is_signed_type(u64)); - - trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit), - (unsigned int)is_signed_type(long)); - - trace_seq_printf(s, "\tfield: int overwrite;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), - 1, - (unsigned int)is_signed_type(long)); - - trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE, - (unsigned int)is_signed_type(char)); - - return !trace_seq_has_overflowed(s); -} - struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; @@ -463,27 +435,9 @@ enum { RB_CTX_MAX }; -#if BITS_PER_LONG == 32 -#define RB_TIME_32 -#endif - -/* To test on 64 bit machines */ -//#define RB_TIME_32 - -#ifdef RB_TIME_32 - -struct rb_time_struct { - local_t cnt; - local_t top; - local_t bottom; - local_t msb; -}; -#else -#include struct rb_time_struct { local64_t time; }; -#endif typedef struct rb_time_struct rb_time_t; #define MAX_NEST 5 @@ -557,6 +511,10 @@ struct trace_buffer { struct rb_irq_work irq_work; bool time_stamp_abs; + + unsigned int subbuf_size; + unsigned int subbuf_order; + unsigned int max_data_size; }; struct ring_buffer_iter { @@ -570,150 +528,48 @@ struct ring_buffer_iter { u64 read_stamp; u64 page_stamp; struct ring_buffer_event *event; + size_t event_size; int missed_events; }; -#ifdef RB_TIME_32 - -/* - * On 32 bit machines, local64_t is very expensive. As the ring - * buffer doesn't need all the features of a true 64 bit atomic, - * on 32 bit, it uses these functions (64 still uses local64_t). - * - * For the ring buffer, 64 bit required operations for the time is - * the following: - * - * - Reads may fail if it interrupted a modification of the time stamp. - * It will succeed if it did not interrupt another write even if - * the read itself is interrupted by a write. - * It returns whether it was successful or not. - * - * - Writes always succeed and will overwrite other writes and writes - * that were done by events interrupting the current write. - * - * - A write followed by a read of the same time stamp will always succeed, - * but may not contain the same value. - * - * - A cmpxchg will fail if it interrupted another write or cmpxchg. - * Other than that, it acts like a normal cmpxchg. - * - * The 60 bit time stamp is broken up by 30 bits in a top and bottom half - * (bottom being the least significant 30 bits of the 60 bit time stamp). - * - * The two most significant bits of each half holds a 2 bit counter (0-3). - * Each update will increment this counter by one. - * When reading the top and bottom, if the two counter bits match then the - * top and bottom together make a valid 60 bit number. - */ -#define RB_TIME_SHIFT 30 -#define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1) -#define RB_TIME_MSB_SHIFT 60 - -static inline int rb_time_cnt(unsigned long val) +int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s) { - return (val >> RB_TIME_SHIFT) & 3; -} - -static inline u64 rb_time_val(unsigned long top, unsigned long bottom) -{ - u64 val; - - val = top & RB_TIME_VAL_MASK; - val <<= RB_TIME_SHIFT; - val |= bottom & RB_TIME_VAL_MASK; - - return val; -} - -static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) -{ - unsigned long top, bottom, msb; - unsigned long c; - - /* - * If the read is interrupted by a write, then the cnt will - * be different. Loop until both top and bottom have been read - * without interruption. - */ - do { - c = local_read(&t->cnt); - top = local_read(&t->top); - bottom = local_read(&t->bottom); - msb = local_read(&t->msb); - } while (c != local_read(&t->cnt)); - - *cnt = rb_time_cnt(top); - - /* If top, msb or bottom counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom)) - return false; - - /* The shift to msb will lose its cnt bits */ - *ret = rb_time_val(top, bottom) | ((u64)msb << RB_TIME_MSB_SHIFT); - return true; -} - -static bool rb_time_read(rb_time_t *t, u64 *ret) -{ - unsigned long cnt; - - return __rb_time_read(t, ret, &cnt); -} - -static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt) -{ - return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT); -} - -static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom, - unsigned long *msb) -{ - *top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK); - *bottom = (unsigned long)(val & RB_TIME_VAL_MASK); - *msb = (unsigned long)(val >> RB_TIME_MSB_SHIFT); -} + struct buffer_data_page field; -static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt) -{ - val = rb_time_val_cnt(val, cnt); - local_set(t, val); -} + trace_seq_printf(s, "\tfield: u64 timestamp;\t" + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); -static void rb_time_set(rb_time_t *t, u64 val) -{ - unsigned long cnt, top, bottom, msb; + trace_seq_printf(s, "\tfield: local_t commit;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); - rb_time_split(val, &top, &bottom, &msb); + trace_seq_printf(s, "\tfield: int overwrite;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + 1, + (unsigned int)is_signed_type(long)); - /* Writes always succeed with a valid number even if it gets interrupted. */ - do { - cnt = local_inc_return(&t->cnt); - rb_time_val_set(&t->top, top, cnt); - rb_time_val_set(&t->bottom, bottom, cnt); - rb_time_val_set(&t->msb, val >> RB_TIME_MSB_SHIFT, cnt); - } while (cnt != local_read(&t->cnt)); -} + trace_seq_printf(s, "\tfield: char data;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), data), + (unsigned int)buffer->subbuf_size, + (unsigned int)is_signed_type(char)); -static inline bool -rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) -{ - return local_try_cmpxchg(l, &expect, set); + return !trace_seq_has_overflowed(s); } -#else /* 64 bits */ - -/* local64_t always succeeds */ - -static inline bool rb_time_read(rb_time_t *t, u64 *ret) +static inline void rb_time_read(rb_time_t *t, u64 *ret) { *ret = local64_read(&t->time); - return true; } static void rb_time_set(rb_time_t *t, u64 val) { local64_set(&t->time, val); } -#endif /* * Enable this to make sure that the event passed to @@ -820,10 +676,7 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, WARN_ONCE(1, "nest (%d) greater than max", nest); fail: - /* Can only fail on 32 bit */ - if (!rb_time_read(&cpu_buffer->write_stamp, &ts)) - /* Screw it, just read the current time */ - ts = rb_time_stamp(cpu_buffer->buffer); + rb_time_read(&cpu_buffer->write_stamp, &ts); return ts; } @@ -1619,10 +1472,12 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, list_add(&bpage->list, pages); - page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0); + page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, + cpu_buffer->buffer->subbuf_order); if (!page) goto free_pages; bpage->page = page_address(page); + bpage->order = cpu_buffer->buffer->subbuf_order; rb_init_page(bpage->page); if (user_thread && fatal_signal_pending(current)) @@ -1701,7 +1556,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) rb_check_bpage(cpu_buffer, bpage); cpu_buffer->reader_page = bpage; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); + + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, cpu_buffer->buffer->subbuf_order); if (!page) goto fail_free_reader; bpage->page = page_address(page); @@ -1784,7 +1640,14 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) goto fail_free_buffer; - nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + /* Default buffer page size - one system page */ + buffer->subbuf_order = 0; + buffer->subbuf_size = PAGE_SIZE - BUF_PAGE_HDR_SIZE; + + /* Max payload is buffer page size - header (8bytes) */ + buffer->max_data_size = buffer->subbuf_size - (sizeof(u32) * 2); + + nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); buffer->flags = flags; buffer->clock = trace_clock_local; buffer->reader_lock_key = key; @@ -2103,7 +1966,7 @@ static void update_pages_handler(struct work_struct *work) * @size: the new size. * @cpu_id: the cpu buffer to resize * - * Minimum size is 2 * BUF_PAGE_SIZE. + * Minimum size is 2 * buffer->subbuf_size. * * Returns 0 on success and < 0 on failure. */ @@ -2125,7 +1988,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, !cpumask_test_cpu(cpu_id, buffer->cpumask)) return 0; - nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); /* we need a minimum of two pages */ if (nr_pages < 2) @@ -2372,7 +2235,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ barrier(); - if ((iter->head + length) > commit || length > BUF_PAGE_SIZE) + if ((iter->head + length) > commit || length > iter->event_size) /* Writer corrupted the read? */ goto reset; @@ -2412,11 +2275,13 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) } static __always_inline unsigned -rb_event_index(struct ring_buffer_event *event) +rb_event_index(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; - return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; + addr &= (PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1; + + return addr - BUF_PAGE_HDR_SIZE; } static void rb_inc_iter(struct ring_buffer_iter *iter) @@ -2605,6 +2470,7 @@ static inline void rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { + unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); struct buffer_page *tail_page = info->tail_page; struct ring_buffer_event *event; unsigned long length = info->length; @@ -2613,13 +2479,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * Only the event that crossed the page boundary * must fill the old tail_page with padding. */ - if (tail >= BUF_PAGE_SIZE) { + if (tail >= bsize) { /* * If the page was filled, then we still need * to update the real_end. Reset it to zero * and the reader will ignore it. */ - if (tail == BUF_PAGE_SIZE) + if (tail == bsize) tail_page->real_end = 0; local_sub(length, &tail_page->write); @@ -2647,7 +2513,7 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * If we are less than the minimum size, we don't need to * worry about it. */ - if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { + if (tail > (bsize - RB_EVNT_MIN_SIZE)) { /* No room for any events */ /* Mark the rest of the page with padding */ @@ -2662,19 +2528,19 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, } /* Put in a discarded event */ - event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; + event->array[0] = (bsize - tail) - RB_EVNT_HDR_SIZE; event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ event->time_delta = 1; /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + local_add(bsize - tail, &cpu_buffer->entries_bytes); /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); /* Set write to end of buffer */ - length = (tail + length) - BUF_PAGE_SIZE; + length = (tail + length) - bsize; local_sub(length, &tail_page->write); } @@ -2788,7 +2654,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, /* Slow path */ static struct ring_buffer_event * -rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) +rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event, u64 delta, bool abs) { if (abs) event->type_len = RINGBUF_TYPE_TIME_STAMP; @@ -2796,7 +2663,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) event->type_len = RINGBUF_TYPE_TIME_EXTEND; /* Not the first event on the page, or not delta? */ - if (abs || rb_event_index(event)) { + if (abs || rb_event_index(cpu_buffer, event)) { event->time_delta = delta & TS_MASK; event->array[0] = delta >> TS_SHIFT; } else { @@ -2826,7 +2693,7 @@ rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer, (unsigned long long)info->ts, (unsigned long long)info->before, (unsigned long long)info->after, - (unsigned long long)(rb_time_read(&cpu_buffer->write_stamp, &write_stamp) ? write_stamp : 0), + (unsigned long long)({rb_time_read(&cpu_buffer->write_stamp, &write_stamp); write_stamp;}), sched_clock_stable() ? "" : "If you just came from a suspend/resume,\n" "please switch to the trace global clock:\n" @@ -2870,7 +2737,7 @@ static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, if (!abs) info->delta = 0; } - *event = rb_add_time_stamp(*event, info->delta, abs); + *event = rb_add_time_stamp(cpu_buffer, *event, info->delta, abs); *length -= RB_LEN_TIME_EXTEND; *delta = 0; } @@ -2954,10 +2821,10 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage; unsigned long addr; - new_index = rb_event_index(event); + new_index = rb_event_index(cpu_buffer, event); old_index = new_index + rb_event_ts_length(event); addr = (unsigned long)event; - addr &= PAGE_MASK; + addr &= ~((PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1); bpage = READ_ONCE(cpu_buffer->tail_page); @@ -3344,6 +3211,76 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); #define CHECK_FULL_PAGE 1L #ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS + +static const char *show_irq_str(int bits) +{ + const char *type[] = { + ".", // 0 + "s", // 1 + "h", // 2 + "Hs", // 3 + "n", // 4 + "Ns", // 5 + "Nh", // 6 + "NHs", // 7 + }; + + return type[bits]; +} + +/* Assume this is an trace event */ +static const char *show_flags(struct ring_buffer_event *event) +{ + struct trace_entry *entry; + int bits = 0; + + if (rb_event_data_length(event) - RB_EVNT_HDR_SIZE < sizeof(*entry)) + return "X"; + + entry = ring_buffer_event_data(event); + + if (entry->flags & TRACE_FLAG_SOFTIRQ) + bits |= 1; + + if (entry->flags & TRACE_FLAG_HARDIRQ) + bits |= 2; + + if (entry->flags & TRACE_FLAG_NMI) + bits |= 4; + + return show_irq_str(bits); +} + +static const char *show_irq(struct ring_buffer_event *event) +{ + struct trace_entry *entry; + + if (rb_event_data_length(event) - RB_EVNT_HDR_SIZE < sizeof(*entry)) + return ""; + + entry = ring_buffer_event_data(event); + if (entry->flags & TRACE_FLAG_IRQS_OFF) + return "d"; + return ""; +} + +static const char *show_interrupt_level(void) +{ + unsigned long pc = preempt_count(); + unsigned char level = 0; + + if (pc & SOFTIRQ_OFFSET) + level |= 1; + + if (pc & HARDIRQ_MASK) + level |= 2; + + if (pc & NMI_MASK) + level |= 4; + + return show_irq_str(level); +} + static void dump_buffer_page(struct buffer_data_page *bpage, struct rb_event_info *info, unsigned long tail) @@ -3364,34 +3301,57 @@ static void dump_buffer_page(struct buffer_data_page *bpage, case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); ts += delta; - pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta); + pr_warn(" 0x%x: [%lld] delta:%lld TIME EXTEND\n", + e, ts, delta); break; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); ts = rb_fix_abs_ts(delta, ts); - pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta); + pr_warn(" 0x%x: [%lld] absolute:%lld TIME STAMP\n", + e, ts, delta); break; case RINGBUF_TYPE_PADDING: ts += event->time_delta; - pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta); + pr_warn(" 0x%x: [%lld] delta:%d PADDING\n", + e, ts, event->time_delta); break; case RINGBUF_TYPE_DATA: ts += event->time_delta; - pr_warn(" [%lld] delta:%d\n", ts, event->time_delta); + pr_warn(" 0x%x: [%lld] delta:%d %s%s\n", + e, ts, event->time_delta, + show_flags(event), show_irq(event)); break; default: break; } } + pr_warn("expected end:0x%lx last event actually ended at:0x%x\n", tail, e); } static DEFINE_PER_CPU(atomic_t, checking); static atomic_t ts_dump; +#define buffer_warn_return(fmt, ...) \ + do { \ + /* If another report is happening, ignore this one */ \ + if (atomic_inc_return(&ts_dump) != 1) { \ + atomic_dec(&ts_dump); \ + goto out; \ + } \ + atomic_inc(&cpu_buffer->record_disabled); \ + pr_warn(fmt, ##__VA_ARGS__); \ + dump_buffer_page(bpage, info, tail); \ + atomic_dec(&ts_dump); \ + /* There's some cases in boot up that this can happen */ \ + if (WARN_ON_ONCE(system_state != SYSTEM_BOOTING)) \ + /* Do not re-enable checking */ \ + return; \ + } while (0) + /* * Check if the current event time stamp matches the deltas on * the buffer page. @@ -3445,7 +3405,12 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); - ts = rb_fix_abs_ts(delta, ts); + delta = rb_fix_abs_ts(delta, ts); + if (delta < ts) { + buffer_warn_return("[CPU: %d]ABSOLUTE TIME WENT BACKWARDS: last ts: %lld absolute ts: %lld\n", + cpu_buffer->cpu, ts, delta); + } + ts = delta; break; case RINGBUF_TYPE_PADDING: @@ -3462,23 +3427,11 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, } if ((full && ts > info->ts) || (!full && ts + info->delta != info->ts)) { - /* If another report is happening, ignore this one */ - if (atomic_inc_return(&ts_dump) != 1) { - atomic_dec(&ts_dump); - goto out; - } - atomic_inc(&cpu_buffer->record_disabled); - /* There's some cases in boot up that this can happen */ - WARN_ON_ONCE(system_state != SYSTEM_BOOTING); - pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n", - cpu_buffer->cpu, - ts + info->delta, info->ts, info->delta, - info->before, info->after, - full ? " (full)" : ""); - dump_buffer_page(bpage, info, tail); - atomic_dec(&ts_dump); - /* Do not re-enable checking */ - return; + buffer_warn_return("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s context:%s\n", + cpu_buffer->cpu, + ts + info->delta, info->ts, info->delta, + info->before, info->after, + full ? " (full)" : "", show_interrupt_level()); } out: atomic_dec(this_cpu_ptr(&checking)); @@ -3498,16 +3451,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event; struct buffer_page *tail_page; unsigned long tail, write, w; - bool a_ok; - bool b_ok; /* Don't let the compiler play games with cpu_buffer->tail_page */ tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page); /*A*/ w = local_read(&tail_page->write) & RB_WRITE_MASK; barrier(); - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + rb_time_read(&cpu_buffer->before_stamp, &info->before); + rb_time_read(&cpu_buffer->write_stamp, &info->after); barrier(); info->ts = rb_time_stamp(cpu_buffer->buffer); @@ -3522,7 +3473,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (!w) { /* Use the sub-buffer timestamp */ info->delta = 0; - } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) { + } else if (unlikely(info->before != info->after)) { info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } else { @@ -3544,7 +3495,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, tail = write - info->length; /* See if we shot pass the end of this buffer page */ - if (unlikely(write > BUF_PAGE_SIZE)) { + if (unlikely(write > cpu_buffer->buffer->subbuf_size)) { check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); return rb_move_tail(cpu_buffer, tail, info); } @@ -3571,8 +3522,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* SLOW PATH - Interrupted between A and C */ /* Save the old before_stamp */ - a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - RB_WARN_ON(cpu_buffer, !a_ok); + rb_time_read(&cpu_buffer->before_stamp, &info->before); /* * Read a new timestamp and update the before_stamp to make @@ -3584,9 +3534,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, rb_time_set(&cpu_buffer->before_stamp, ts); barrier(); - /*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - /* Was interrupted before here, write_stamp must be valid */ - RB_WARN_ON(cpu_buffer, !a_ok); + /*E*/ rb_time_read(&cpu_buffer->write_stamp, &info->after); barrier(); /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && info->after == info->before && info->after < ts) { @@ -3678,7 +3626,7 @@ rb_reserve_next_event(struct trace_buffer *buffer, if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { add_ts_default = RB_ADD_STAMP_ABSOLUTE; info.length += RB_LEN_TIME_EXTEND; - if (info.length > BUF_MAX_DATA_SIZE) + if (info.length > cpu_buffer->buffer->max_data_size) goto out_fail; } else { add_ts_default = RB_ADD_STAMP_NONE; @@ -3753,7 +3701,7 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) if (unlikely(atomic_read(&cpu_buffer->record_disabled))) goto out; - if (unlikely(length > BUF_MAX_DATA_SIZE)) + if (unlikely(length > buffer->max_data_size)) goto out; if (unlikely(trace_recursive_lock(cpu_buffer))) @@ -3787,7 +3735,7 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage = cpu_buffer->commit_page; struct buffer_page *start; - addr &= PAGE_MASK; + addr &= ~((PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1); /* Do the likely case first */ if (likely(bpage->page == (void *)addr)) { @@ -3903,7 +3851,7 @@ int ring_buffer_write(struct trace_buffer *buffer, if (atomic_read(&cpu_buffer->record_disabled)) goto out; - if (length > BUF_MAX_DATA_SIZE) + if (length > buffer->max_data_size) goto out; if (unlikely(trace_recursive_lock(cpu_buffer))) @@ -4483,6 +4431,7 @@ static struct buffer_page * rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; + unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); unsigned long overwrite; unsigned long flags; int nr_loops = 0; @@ -4618,7 +4567,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) #define USECS_WAIT 1000000 for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) { /* If the write is past the end of page, a writer is still updating it */ - if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE)) + if (likely(!reader || rb_page_write(reader) <= bsize)) break; udelay(1); @@ -5062,7 +5011,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) return NULL; /* Holds the entire event: data and meta data */ - iter->event = kmalloc(BUF_PAGE_SIZE, flags); + iter->event_size = buffer->subbuf_size; + iter->event = kmalloc(iter->event_size, flags); if (!iter->event) { kfree(iter); return NULL; @@ -5178,19 +5128,28 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_advance); */ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { - /* - * Earlier, this method returned - * BUF_PAGE_SIZE * buffer->nr_pages - * Since the nr_pages field is now removed, we have converted this to - * return the per cpu buffer value. - */ if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; - return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages; + return buffer->subbuf_size * buffer->buffers[cpu]->nr_pages; } EXPORT_SYMBOL_GPL(ring_buffer_size); +/** + * ring_buffer_max_event_size - return the max data size of an event + * @buffer: The ring buffer. + * + * Returns the maximum size an event can be. + */ +unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer) +{ + /* If abs timestamp is requested, events have a timestamp too */ + if (ring_buffer_time_stamp_abs(buffer)) + return buffer->max_data_size - RB_LEN_TIME_EXTEND; + return buffer->max_data_size; +} +EXPORT_SYMBOL_GPL(ring_buffer_max_event_size); + static void rb_clear_buffer_page(struct buffer_page *page) { local_set(&page->write, 0); @@ -5461,6 +5420,9 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; + if (buffer_a->subbuf_order != buffer_b->subbuf_order) + goto out; + ret = -EAGAIN; if (atomic_read(&buffer_a->record_disabled)) @@ -5532,40 +5494,48 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); * Returns: * The page allocated, or ERR_PTR */ -void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) +struct buffer_data_read_page * +ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - struct buffer_data_page *bpage = NULL; + struct buffer_data_read_page *bpage = NULL; unsigned long flags; struct page *page; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return ERR_PTR(-ENODEV); + bpage = kzalloc(sizeof(*bpage), GFP_KERNEL); + if (!bpage) + return ERR_PTR(-ENOMEM); + + bpage->order = buffer->subbuf_order; cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); if (cpu_buffer->free_page) { - bpage = cpu_buffer->free_page; + bpage->data = cpu_buffer->free_page; cpu_buffer->free_page = NULL; } arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); - if (bpage) + if (bpage->data) goto out; - page = alloc_pages_node(cpu_to_node(cpu), - GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, + cpu_buffer->buffer->subbuf_order); + if (!page) { + kfree(bpage); return ERR_PTR(-ENOMEM); + } - bpage = page_address(page); + bpage->data = page_address(page); out: - rb_init_page(bpage); + rb_init_page(bpage->data); return bpage; } @@ -5575,14 +5545,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); * ring_buffer_free_read_page - free an allocated read page * @buffer: the buffer the page was allocate for * @cpu: the cpu buffer the page came from - * @data: the page to free + * @data_page: the page to free * * Free a page allocated from ring_buffer_alloc_read_page. */ -void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data) +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, + struct buffer_data_read_page *data_page) { struct ring_buffer_per_cpu *cpu_buffer; - struct buffer_data_page *bpage = data; + struct buffer_data_page *bpage = data_page->data; struct page *page = virt_to_page(bpage); unsigned long flags; @@ -5591,8 +5562,12 @@ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data cpu_buffer = buffer->buffers[cpu]; - /* If the page is still in use someplace else, we can't reuse it */ - if (page_ref_count(page) > 1) + /* + * If the page is still in use someplace else, or order of the page + * is different from the subbuffer order of the buffer - + * we can't reuse it + */ + if (page_ref_count(page) > 1 || data_page->order != buffer->subbuf_order) goto out; local_irq_save(flags); @@ -5607,7 +5582,8 @@ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data local_irq_restore(flags); out: - free_page((unsigned long)bpage); + free_pages((unsigned long)bpage, data_page->order); + kfree(data_page); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); @@ -5628,9 +5604,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * rpage = ring_buffer_alloc_read_page(buffer, cpu); * if (IS_ERR(rpage)) * return PTR_ERR(rpage); - * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); + * ret = ring_buffer_read_page(buffer, rpage, len, cpu, 0); * if (ret >= 0) - * process_page(rpage, ret); + * process_page(ring_buffer_read_page_data(rpage), ret); + * ring_buffer_free_read_page(buffer, cpu, rpage); * * When @full is set, the function will not return true unless * the writer is off the reader page. @@ -5645,7 +5622,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * <0 if no data has been transferred. */ int ring_buffer_read_page(struct trace_buffer *buffer, - void **data_page, size_t len, int cpu, int full) + struct buffer_data_read_page *data_page, + size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; @@ -5670,10 +5648,12 @@ int ring_buffer_read_page(struct trace_buffer *buffer, len -= BUF_PAGE_HDR_SIZE; - if (!data_page) + if (!data_page || !data_page->data) + goto out; + if (data_page->order != buffer->subbuf_order) goto out; - bpage = *data_page; + bpage = data_page->data; if (!bpage) goto out; @@ -5767,11 +5747,11 @@ int ring_buffer_read_page(struct trace_buffer *buffer, /* swap the pages */ rb_init_page(bpage); bpage = reader->page; - reader->page = *data_page; + reader->page = data_page->data; local_set(&reader->write, 0); local_set(&reader->entries, 0); reader->read = 0; - *data_page = bpage; + data_page->data = bpage; /* * Use the real_end for the data size, @@ -5793,7 +5773,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, /* If there is room at the end of the page to save the * missed events, then record it there. */ - if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { + if (buffer->subbuf_size - commit >= sizeof(missed_events)) { memcpy(&bpage->data[commit], &missed_events, sizeof(missed_events)); local_add(RB_MISSED_STORED, &bpage->commit); @@ -5805,8 +5785,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer, /* * This page may be off to user land. Zero it out here. */ - if (commit < BUF_PAGE_SIZE) - memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); + if (commit < buffer->subbuf_size) + memset(&bpage->data[commit], 0, buffer->subbuf_size - commit); out_unlock: raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); @@ -5816,6 +5796,209 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_read_page); +/** + * ring_buffer_read_page_data - get pointer to the data in the page. + * @page: the page to get the data from + * + * Returns pointer to the actual data in this page. + */ +void *ring_buffer_read_page_data(struct buffer_data_read_page *page) +{ + return page->data; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_page_data); + +/** + * ring_buffer_subbuf_size_get - get size of the sub buffer. + * @buffer: the buffer to get the sub buffer size from + * + * Returns size of the sub buffer, in bytes. + */ +int ring_buffer_subbuf_size_get(struct trace_buffer *buffer) +{ + return buffer->subbuf_size + BUF_PAGE_HDR_SIZE; +} +EXPORT_SYMBOL_GPL(ring_buffer_subbuf_size_get); + +/** + * ring_buffer_subbuf_order_get - get order of system sub pages in one buffer page. + * @buffer: The ring_buffer to get the system sub page order from + * + * By default, one ring buffer sub page equals to one system page. This parameter + * is configurable, per ring buffer. The size of the ring buffer sub page can be + * extended, but must be an order of system page size. + * + * Returns the order of buffer sub page size, in system pages: + * 0 means the sub buffer size is 1 system page and so forth. + * In case of an error < 0 is returned. + */ +int ring_buffer_subbuf_order_get(struct trace_buffer *buffer) +{ + if (!buffer) + return -EINVAL; + + return buffer->subbuf_order; +} +EXPORT_SYMBOL_GPL(ring_buffer_subbuf_order_get); + +/** + * ring_buffer_subbuf_order_set - set the size of ring buffer sub page. + * @buffer: The ring_buffer to set the new page size. + * @order: Order of the system pages in one sub buffer page + * + * By default, one ring buffer pages equals to one system page. This API can be + * used to set new size of the ring buffer page. The size must be order of + * system page size, that's why the input parameter @order is the order of + * system pages that are allocated for one ring buffer page: + * 0 - 1 system page + * 1 - 2 system pages + * 3 - 4 system pages + * ... + * + * Returns 0 on success or < 0 in case of an error. + */ +int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) +{ + struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_page *bpage, *tmp; + int old_order, old_size; + int nr_pages; + int psize; + int err; + int cpu; + + if (!buffer || order < 0) + return -EINVAL; + + if (buffer->subbuf_order == order) + return 0; + + psize = (1 << order) * PAGE_SIZE; + if (psize <= BUF_PAGE_HDR_SIZE) + return -EINVAL; + + old_order = buffer->subbuf_order; + old_size = buffer->subbuf_size; + + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); + atomic_inc(&buffer->record_disabled); + + /* Make sure all commits have finished */ + synchronize_rcu(); + + buffer->subbuf_order = order; + buffer->subbuf_size = psize - BUF_PAGE_HDR_SIZE; + + /* Make sure all new buffers are allocated, before deleting the old ones */ + for_each_buffer_cpu(buffer, cpu) { + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + continue; + + cpu_buffer = buffer->buffers[cpu]; + + /* Update the number of pages to match the new size */ + nr_pages = old_size * buffer->buffers[cpu]->nr_pages; + nr_pages = DIV_ROUND_UP(nr_pages, buffer->subbuf_size); + + /* we need a minimum of two pages */ + if (nr_pages < 2) + nr_pages = 2; + + cpu_buffer->nr_pages_to_update = nr_pages; + + /* Include the reader page */ + nr_pages++; + + /* Allocate the new size buffer */ + INIT_LIST_HEAD(&cpu_buffer->new_pages); + if (__rb_allocate_pages(cpu_buffer, nr_pages, + &cpu_buffer->new_pages)) { + /* not enough memory for new pages */ + err = -ENOMEM; + goto error; + } + } + + for_each_buffer_cpu(buffer, cpu) { + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + continue; + + cpu_buffer = buffer->buffers[cpu]; + + /* Clear the head bit to make the link list normal to read */ + rb_head_page_deactivate(cpu_buffer); + + /* Now walk the list and free all the old sub buffers */ + list_for_each_entry_safe(bpage, tmp, cpu_buffer->pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); + } + /* The above loop stopped an the last page needing to be freed */ + bpage = list_entry(cpu_buffer->pages, struct buffer_page, list); + free_buffer_page(bpage); + + /* Free the current reader page */ + free_buffer_page(cpu_buffer->reader_page); + + /* One page was allocated for the reader page */ + cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next, + struct buffer_page, list); + list_del_init(&cpu_buffer->reader_page->list); + + /* The cpu_buffer pages are a link list with no head */ + cpu_buffer->pages = cpu_buffer->new_pages.next; + cpu_buffer->new_pages.next->prev = cpu_buffer->new_pages.prev; + cpu_buffer->new_pages.prev->next = cpu_buffer->new_pages.next; + + /* Clear the new_pages list */ + INIT_LIST_HEAD(&cpu_buffer->new_pages); + + cpu_buffer->head_page + = list_entry(cpu_buffer->pages, struct buffer_page, list); + cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; + + cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update; + cpu_buffer->nr_pages_to_update = 0; + + free_pages((unsigned long)cpu_buffer->free_page, old_order); + cpu_buffer->free_page = NULL; + + rb_head_page_activate(cpu_buffer); + + rb_check_pages(cpu_buffer); + } + + atomic_dec(&buffer->record_disabled); + mutex_unlock(&buffer->mutex); + + return 0; + +error: + buffer->subbuf_order = old_order; + buffer->subbuf_size = old_size; + + atomic_dec(&buffer->record_disabled); + mutex_unlock(&buffer->mutex); + + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + + if (!cpu_buffer->nr_pages_to_update) + continue; + + list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); + } + } + + return err; +} +EXPORT_SYMBOL_GPL(ring_buffer_subbuf_order_set); + /* * We only allocate new buffers, never free them if the CPU goes down. * If we were to free the buffer, then the user would lose any trace that was in diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index aef34673d79d08bc8adcf228f212e054d813a5e5..008187ebd7fe601d6e09a431cb79d606679aad43 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -104,10 +104,11 @@ static enum event_status read_event(int cpu) static enum event_status read_page(int cpu) { + struct buffer_data_read_page *bpage; struct ring_buffer_event *event; struct rb_page *rpage; unsigned long commit; - void *bpage; + int page_size; int *entry; int ret; int inc; @@ -117,14 +118,15 @@ static enum event_status read_page(int cpu) if (IS_ERR(bpage)) return EVENT_DROPPED; - ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); + page_size = ring_buffer_subbuf_size_get(buffer); + ret = ring_buffer_read_page(buffer, bpage, page_size, cpu, 1); if (ret >= 0) { - rpage = bpage; + rpage = ring_buffer_read_page_data(bpage); /* The commit may have missed event flags set, clear them */ commit = local_read(&rpage->commit) & 0xfffff; for (i = 0; i < commit && !test_error ; i += inc) { - if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { + if (i >= (page_size - offsetof(struct rb_page, data))) { TEST_ERROR(); break; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0defe156b57109c18b5e8a6fbcc639d9c409536..2a7c6fd934e9cb391b5ddf589748c572810ad6d1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1263,10 +1263,17 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { + int order; int ret; if (!tr->allocated_snapshot) { + /* Make the snapshot buffer have the same order as main buffer */ + order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + if (ret < 0) + return ret; + /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, RING_BUFFER_ALL_CPUS); @@ -1286,6 +1293,7 @@ static void free_snapshot(struct trace_array *tr) * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ + ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&tr->max_buffer, 1); tracing_reset_online_cpus(&tr->max_buffer); @@ -3767,7 +3775,7 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str, /* OK if part of the temp seq buffer */ if ((addr >= (unsigned long)iter->tmp_seq.buffer) && - (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE)) + (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) return true; /* Core rodata can not be freed */ @@ -5032,7 +5040,7 @@ static int tracing_release(struct inode *inode, struct file *file) return 0; } -static int tracing_release_generic_tr(struct inode *inode, struct file *file) +int tracing_release_generic_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -6946,8 +6954,8 @@ waitagain: goto out; } - if (cnt >= PAGE_SIZE) - cnt = PAGE_SIZE - 1; + if (cnt >= TRACE_SEQ_BUFFER_SIZE) + cnt = TRACE_SEQ_BUFFER_SIZE - 1; /* reset all but tr, trace, and overruns */ trace_iterator_reset(iter); @@ -7292,8 +7300,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; + int meta_size; ssize_t written; - int size; + size_t size; int len; /* Used in tracing_mark_raw_write() as well */ @@ -7306,23 +7315,44 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (!(tr->trace_flags & TRACE_ITER_MARKERS)) return -EINVAL; - if (cnt > TRACE_BUF_SIZE) - cnt = TRACE_BUF_SIZE; - - BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + if ((ssize_t)cnt < 0) + return -EINVAL; - size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ + meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ + again: + size = cnt + meta_size; /* If less than "", then make sure we can still add that */ if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; + if (size > TRACE_SEQ_BUFFER_SIZE) { + cnt -= size - TRACE_SEQ_BUFFER_SIZE; + goto again; + } + buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, tracing_gen_ctx()); - if (unlikely(!event)) + if (unlikely(!event)) { + /* + * If the size was greater than what was allowed, then + * make it smaller and try again. + */ + if (size > ring_buffer_max_event_size(buffer)) { + /* cnt < FAULTED size should never be bigger than max */ + if (WARN_ON_ONCE(cnt < FAULTED_SIZE)) + return -EBADF; + cnt = ring_buffer_max_event_size(buffer) - meta_size; + /* The above should only happen once */ + if (WARN_ON_ONCE(cnt + meta_size == size)) + return -EBADF; + goto again; + } + /* Ring buffer disabled, return as if not open for write */ return -EBADF; + } entry = ring_buffer_event_data(event); entry->ip = _THIS_IP_; @@ -7357,9 +7387,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, return written; } -/* Limit it for now to 3K (including tag) */ -#define RAW_DATA_MAX_SIZE (1024*3) - static ssize_t tracing_mark_raw_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) @@ -7381,19 +7408,18 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, return -EINVAL; /* The marker must at least have a tag id */ - if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE) + if (cnt < sizeof(unsigned int)) return -EINVAL; - if (cnt > TRACE_BUF_SIZE) - cnt = TRACE_BUF_SIZE; - - BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; + + if (size > ring_buffer_max_event_size(buffer)) + return -EINVAL; + event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, tracing_gen_ctx()); if (!event) @@ -7578,6 +7604,7 @@ struct ftrace_buffer_info { struct trace_iterator iter; void *spare; unsigned int spare_cpu; + unsigned int spare_size; unsigned int read; }; @@ -8282,6 +8309,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; + void *trace_data; + int page_size; ssize_t ret = 0; ssize_t size; @@ -8293,6 +8322,17 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return -EBUSY; #endif + page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); + + /* Make sure the spare matches the current sub buffer size */ + if (info->spare) { + if (page_size != info->spare_size) { + ring_buffer_free_read_page(iter->array_buffer->buffer, + info->spare_cpu, info->spare); + info->spare = NULL; + } + } + if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); @@ -8301,19 +8341,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->spare = NULL; } else { info->spare_cpu = iter->cpu_file; + info->spare_size = page_size; } } if (!info->spare) return ret; /* Do we have previous read data to read? */ - if (info->read < PAGE_SIZE) + if (info->read < page_size) goto read; again: trace_access_lock(iter->cpu_file); ret = ring_buffer_read_page(iter->array_buffer->buffer, - &info->spare, + info->spare, count, iter->cpu_file, 0); trace_access_unlock(iter->cpu_file); @@ -8334,11 +8375,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->read = 0; read: - size = PAGE_SIZE - info->read; + size = page_size - info->read; if (size > count) size = count; - - ret = copy_to_user(ubuf, info->spare + info->read, size); + trace_data = ring_buffer_read_page_data(info->spare); + ret = copy_to_user(ubuf, trace_data + info->read, size); if (ret == size) return -EFAULT; @@ -8449,6 +8490,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; + int page_size; int entries, i; ssize_t ret = 0; @@ -8457,13 +8499,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, return -EBUSY; #endif - if (*ppos & (PAGE_SIZE - 1)) + page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); + if (*ppos & (page_size - 1)) return -EINVAL; - if (len & (PAGE_SIZE - 1)) { - if (len < PAGE_SIZE) + if (len & (page_size - 1)) { + if (len < page_size) return -EINVAL; - len &= PAGE_MASK; + len &= (~(page_size - 1)); } if (splice_grow_spd(pipe, &spd)) @@ -8473,7 +8516,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); - for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { + for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { struct page *page; int r; @@ -8494,7 +8537,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } ref->cpu = iter->cpu_file; - r = ring_buffer_read_page(ref->buffer, &ref->page, + r = ring_buffer_read_page(ref->buffer, ref->page, len, iter->cpu_file, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->cpu, @@ -8503,14 +8546,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - page = virt_to_page(ref->page); + page = virt_to_page(ring_buffer_read_page_data(ref->page)); spd.pages[i] = page; - spd.partial[i].len = PAGE_SIZE; + spd.partial[i].len = page_size; spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; - *ppos += PAGE_SIZE; + *ppos += page_size; entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } @@ -9354,6 +9397,103 @@ static const struct file_operations buffer_percent_fops = { .llseek = default_llseek, }; +static ssize_t +buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + size_t size; + char buf[64]; + int order; + int r; + + order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + size = (PAGE_SIZE << order) / 1024; + + r = sprintf(buf, "%zd\n", size); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + unsigned long val; + int old_order; + int order; + int pages; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + val *= 1024; /* value passed in is in KB */ + + pages = DIV_ROUND_UP(val, PAGE_SIZE); + order = fls(pages - 1); + + /* limit between 1 and 128 system pages */ + if (order < 0 || order > 7) + return -EINVAL; + + /* Do not allow tracing while changing the order of the ring buffer */ + tracing_stop_tr(tr); + + old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + if (old_order == order) + goto out; + + ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); + if (ret) + goto out; + +#ifdef CONFIG_TRACER_MAX_TRACE + + if (!tr->allocated_snapshot) + goto out_max; + + ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + if (ret) { + /* Put back the old order */ + cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); + if (WARN_ON_ONCE(cnt)) { + /* + * AARGH! We are left with different orders! + * The max buffer is our "snapshot" buffer. + * When a tracer needs a snapshot (one of the + * latency tracers), it swaps the max buffer + * with the saved snap shot. We succeeded to + * update the order of the main buffer, but failed to + * update the order of the max buffer. But when we tried + * to reset the main buffer to the original size, we + * failed there too. This is very unlikely to + * happen, but if it does, warn and kill all + * tracing. + */ + tracing_disabled = 1; + } + goto out; + } + out_max: +#endif + (*ppos)++; + out: + if (ret) + cnt = ret; + tracing_start_tr(tr); + return cnt; +} + +static const struct file_operations buffer_subbuf_size_fops = { + .open = tracing_open_generic_tr, + .read = buffer_subbuf_size_read, + .write = buffer_subbuf_size_write, + .release = tracing_release_generic_tr, + .llseek = default_llseek, +}; + static struct dentry *trace_instance_dir; static void @@ -9504,7 +9644,8 @@ static int trace_array_create_dir(struct trace_array *tr) return ret; } -static struct trace_array *trace_array_create(const char *name) +static struct trace_array * +trace_array_create_systems(const char *name, const char *systems) { struct trace_array *tr; int ret; @@ -9524,6 +9665,12 @@ static struct trace_array *trace_array_create(const char *name) if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) goto out_free_tr; + if (systems) { + tr->system_names = kstrdup_const(systems, GFP_KERNEL); + if (!tr->system_names) + goto out_free_tr; + } + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -9570,12 +9717,18 @@ static struct trace_array *trace_array_create(const char *name) free_trace_buffers(tr); free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); + kfree_const(tr->system_names); kfree(tr->name); kfree(tr); return ERR_PTR(ret); } +static struct trace_array *trace_array_create(const char *name) +{ + return trace_array_create_systems(name, NULL); +} + static int instance_mkdir(const char *name) { struct trace_array *tr; @@ -9601,6 +9754,7 @@ out_unlock: /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. * @name: The name of the trace array to be looked up/created. + * @systems: A list of systems to create event directories for (NULL for all) * * Returns pointer to trace array with given name. * NULL, if it cannot be created. @@ -9614,7 +9768,7 @@ out_unlock: * trace_array_put() is called, user space can not delete it. * */ -struct trace_array *trace_array_get_by_name(const char *name) +struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { struct trace_array *tr; @@ -9626,7 +9780,7 @@ struct trace_array *trace_array_get_by_name(const char *name) goto out_unlock; } - tr = trace_array_create(name); + tr = trace_array_create_systems(name, systems); if (IS_ERR(tr)) tr = NULL; @@ -9673,6 +9827,7 @@ static int __remove_instance(struct trace_array *tr) free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); + kfree_const(tr->system_names); kfree(tr->name); kfree(tr); @@ -9805,6 +9960,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, tr, &buffer_percent_fops); + trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, + tr, &buffer_subbuf_size_fops); + create_trace_options_dir(tr); #ifdef CONFIG_TRACER_MAX_TRACE @@ -10391,7 +10549,7 @@ __init static void enable_instances(void) if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) do_allocate_snapshot(tok); - tr = trace_array_get_by_name(tok); + tr = trace_array_get_by_name(tok, NULL); if (!tr) { pr_warn("Failed to create instance buffer %s\n", curr_str); continue; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0489e72c8169c19754159efa623057ae72a0c5db..00f873910c5d9c835b335ba2731c1062c7427057 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -377,6 +377,7 @@ struct trace_array { unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; + const char *system_names; struct list_head err_log; struct dentry *dir; struct dentry *options; @@ -615,6 +616,7 @@ void tracing_reset_all_online_cpus(void); void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); +int tracing_release_generic_tr(struct inode *inode, struct file *file); int tracing_open_file_tr(struct inode *inode, struct file *filp); int tracing_release_file_tr(struct inode *inode, struct file *filp); int tracing_single_release_file_tr(struct inode *inode, struct file *filp); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 7ccc7a8e155b9e5572eb8457f300fe411d46537b..dbe29b4c6a7a07423b6baa94e243d170c4049108 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -633,7 +633,7 @@ trace_boot_init_instances(struct xbc_node *node) if (!p || *p == '\0') continue; - tr = trace_array_get_by_name(p); + tr = trace_array_get_by_name(p, NULL); if (!tr) { pr_err("Failed to get trace instance %s\n", p); continue; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f29e815ca5b2e9bb7146f24817cc5c5b9aa78009..7c364b87352eed92e0f76137091882231f187028 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1893,9 +1893,9 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } static ssize_t -show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int (*func)(struct trace_seq *s) = filp->private_data; + struct trace_array *tr = filp->private_data; struct trace_seq *s; int r; @@ -1908,7 +1908,31 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) trace_seq_init(s); - func(s); + ring_buffer_print_page_header(tr->array_buffer.buffer, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, trace_seq_used(s)); + + kfree(s); + + return r; +} + +static ssize_t +show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + ring_buffer_print_entry_header(s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); @@ -2165,10 +2189,18 @@ static const struct file_operations ftrace_tr_enable_fops = { .release = subsystem_release, }; -static const struct file_operations ftrace_show_header_fops = { - .open = tracing_open_generic, - .read = show_header, +static const struct file_operations ftrace_show_header_page_fops = { + .open = tracing_open_generic_tr, + .read = show_header_page_file, + .llseek = default_llseek, + .release = tracing_release_generic_tr, +}; + +static const struct file_operations ftrace_show_header_event_fops = { + .open = tracing_open_generic_tr, + .read = show_header_event_file, .llseek = default_llseek, + .release = tracing_release_generic_tr, }; static int @@ -2896,6 +2928,27 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) up_write(&trace_event_sem); } +static bool event_in_systems(struct trace_event_call *call, + const char *systems) +{ + const char *system; + const char *p; + + if (!systems) + return true; + + system = call->class->system; + p = strstr(systems, system); + if (!p) + return false; + + if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',') + return false; + + p += strlen(system); + return !*p || isspace(*p) || *p == ','; +} + static struct trace_event_file * trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) @@ -2905,9 +2958,12 @@ trace_create_new_event(struct trace_event_call *call, struct trace_event_file *file; unsigned int first; + if (!event_in_systems(call, tr->system_names)) + return NULL; + file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) - return NULL; + return ERR_PTR(-ENOMEM); pid_list = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); @@ -2972,8 +3028,17 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) struct trace_event_file *file; file = trace_create_new_event(call, tr); + /* + * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed + * allocation, or NULL if the event is not part of the tr->system_names. + * When the event is not part of the tr->system_names, return zero, not + * an error. + */ if (!file) - return -ENOMEM; + return 0; + + if (IS_ERR(file)) + return PTR_ERR(file); if (eventdir_initialized) return event_create_dir(tr->event_dir, file); @@ -3012,8 +3077,17 @@ __trace_early_add_new_event(struct trace_event_call *call, int ret; file = trace_create_new_event(call, tr); + /* + * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed + * allocation, or NULL if the event is not part of the tr->system_names. + * When the event is not part of the tr->system_names, return zero, not + * an error. + */ if (!file) - return -ENOMEM; + return 0; + + if (IS_ERR(file)) + return PTR_ERR(file); ret = event_define_fields(call); if (ret) @@ -3752,17 +3826,16 @@ static int events_callback(const char *name, umode_t *mode, void **data, return 1; } - if (strcmp(name, "header_page") == 0) - *data = ring_buffer_print_page_header; - - else if (strcmp(name, "header_event") == 0) - *data = ring_buffer_print_entry_header; + if (strcmp(name, "header_page") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_show_header_page_fops; - else + } else if (strcmp(name, "header_event") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_show_header_event_fops; + } else return 0; - *mode = TRACE_MODE_READ; - *fops = &ftrace_show_header_fops; return 1; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5ecf3c8bde205f360e880b608b11abcddb689a70..6ece1308d36a02dec5af3ca3cebdb9d6b427aac7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4805,36 +4805,35 @@ static int parse_actions(struct hist_trigger_data *hist_data) int len; for (i = 0; i < hist_data->attrs->n_actions; i++) { + enum handler_id hid = 0; + char *action_str; + str = hist_data->attrs->action_str[i]; - if ((len = str_has_prefix(str, "onmatch("))) { - char *action_str = str + len; + if ((len = str_has_prefix(str, "onmatch("))) + hid = HANDLER_ONMATCH; + else if ((len = str_has_prefix(str, "onmax("))) + hid = HANDLER_ONMAX; + else if ((len = str_has_prefix(str, "onchange("))) + hid = HANDLER_ONCHANGE; - data = onmatch_parse(tr, action_str); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - } else if ((len = str_has_prefix(str, "onmax("))) { - char *action_str = str + len; + action_str = str + len; - data = track_data_parse(hist_data, action_str, - HANDLER_ONMAX); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - } else if ((len = str_has_prefix(str, "onchange("))) { - char *action_str = str + len; + switch (hid) { + case HANDLER_ONMATCH: + data = onmatch_parse(tr, action_str); + break; + case HANDLER_ONMAX: + case HANDLER_ONCHANGE: + data = track_data_parse(hist_data, action_str, hid); + break; + default: + data = ERR_PTR(-EINVAL); + break; + } - data = track_data_parse(hist_data, action_str, - HANDLER_ONCHANGE); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - } else { - ret = -EINVAL; + if (IS_ERR(data)) { + ret = PTR_ERR(data); break; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 52f8b537dd0a0872dc1adcbe09778eb76270aad0..c4c6e0e0068be79a966775920903266df86cd8e7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -487,8 +487,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; if (within_notrace_func(tk)) { - pr_warn("Could not probe notrace function %s\n", - trace_kprobe_symbol(tk)); + pr_warn("Could not probe notrace function %ps\n", + (void *)trace_kprobe_address(tk)); return -EINVAL; } diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 7be97229ddf86008a4fa91c4872f3b9eb85a5181..c158d65a8a886efc64516d7dd8a333aa27da594e 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -13,9 +13,6 @@ * trace_seq_init() more than once to reset the trace_seq to start * from scratch. * - * The buffer size is currently PAGE_SIZE, although it may become dynamic - * in the future. - * * A write to the buffer will either succeed or fail. That is, unlike * sprintf() there will not be a partial write (well it may write into * the buffer but it wont update the pointers). This allows users to diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f957127c7e41b825164a0d102b6fd0..a4dcf0f2435213bc2b2b91d677ec18290aa53859 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba25129563ad769ec84436af8b79cfbf3f265d97..975a07f9f1cc08838d272f83d5f04a85ff2f5cd2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -231,9 +231,10 @@ config DEBUG_INFO in the "Debug information" choice below, indicating that debug information will be generated for build targets. -# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which -# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 -config AS_HAS_NON_CONST_LEB128 +# Clang generates .uleb128 with label differences for DWARF v5, a feature that +# older binutils ports do not support when utilizing RISC-V style linker +# relaxation: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 +config AS_HAS_NON_CONST_ULEB128 def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:) choice @@ -258,7 +259,7 @@ config DEBUG_INFO_NONE config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT bool "Rely on the toolchain's implicit default DWARF version" select DEBUG_INFO - depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) + depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) help The implicit default version of DWARF debug info produced by a toolchain changes over time. @@ -282,7 +283,8 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" select DEBUG_INFO - depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) + depends on !ARCH_HAS_BROKEN_DWARF5 + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc 5.0+ accepts the -gdwarf-5 flag but only had partial support for some diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 0eed92b77ba377cf1b838f083f6472af7601ba45..225bb77014600f796e972a9c0f03638c23750a06 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -1,15 +1,21 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Test cases csum_partial and csum_fold + * Test cases csum_partial, csum_fold, ip_fast_csum, csum_ipv6_magic */ #include #include +#include #define MAX_LEN 512 #define MAX_ALIGN 64 #define TEST_BUFLEN (MAX_LEN + MAX_ALIGN) +#define IPv4_MIN_WORDS 5 +#define IPv4_MAX_WORDS 15 +#define NUM_IPv6_TESTS 200 +#define NUM_IP_FAST_CSUM_TESTS 181 + /* Values for a little endian CPU. Byte swap each half on big endian CPU. */ static const u32 random_init_sum = 0x2847aab; static const u8 random_buf[] = { @@ -209,6 +215,237 @@ static const u32 init_sums_no_overflow[] = { 0xffff0000, 0xfffffffb, }; +static const __sum16 expected_csum_ipv6_magic[] = { + 0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0, + 0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e, + 0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d, + 0xdf81, 0x8fd5, 0x3b5d, 0x8324, 0xf471, 0x83be, 0x1daf, 0x8c46, 0xe682, + 0xd1fb, 0x6b2e, 0xe687, 0x2a33, 0x4833, 0x2d67, 0x660f, 0x2e79, 0xd65e, + 0x6b62, 0x6672, 0x5dbd, 0x8680, 0xbaa5, 0x2229, 0x2125, 0x2d01, 0x1cc0, + 0x6d36, 0x33c0, 0xee36, 0xd832, 0x9820, 0x8a31, 0x53c5, 0x2e2, 0xdb0e, + 0x49ed, 0x17a7, 0x77a0, 0xd72e, 0x3d72, 0x7dc8, 0x5b17, 0xf55d, 0xa4d9, + 0x1446, 0x5d56, 0x6b2e, 0x69a5, 0xadb6, 0xff2a, 0x92e, 0xe044, 0x3402, + 0xbb60, 0xec7f, 0xe7e6, 0x1986, 0x32f4, 0x8f8, 0x5e00, 0x47c6, 0x3059, + 0x3969, 0xe957, 0x4388, 0x2854, 0x3334, 0xea71, 0xa6de, 0x33f9, 0x83fc, + 0x37b4, 0x5531, 0x3404, 0x1010, 0xed30, 0x610a, 0xc95, 0x9aed, 0x6ff, + 0x5136, 0x2741, 0x660e, 0x8b80, 0xf71, 0xa263, 0x88af, 0x7a73, 0x3c37, + 0x1908, 0x6db5, 0x2e92, 0x1cd2, 0x70c8, 0xee16, 0xe80, 0xcd55, 0x6e6, + 0x6434, 0x127, 0x655d, 0x2ea0, 0xb4f4, 0xdc20, 0x5671, 0xe462, 0xe52b, + 0xdb44, 0x3589, 0xc48f, 0xe60b, 0xd2d2, 0x66ad, 0x498, 0x436, 0xb917, + 0xf0ca, 0x1a6e, 0x1cb7, 0xbf61, 0x2870, 0xc7e8, 0x5b30, 0xe4a5, 0x168, + 0xadfc, 0xd035, 0xe690, 0xe283, 0xfb27, 0xe4ad, 0xb1a5, 0xf2d5, 0xc4b6, + 0x8a30, 0xd7d5, 0x7df9, 0x91d5, 0x63ed, 0x2d21, 0x312b, 0xab19, 0xa632, + 0x8d2e, 0xef06, 0x57b9, 0xc373, 0xbd1f, 0xa41f, 0x8444, 0x9975, 0x90cb, + 0xc49c, 0xe965, 0x4eff, 0x5a, 0xef6d, 0xe81a, 0xe260, 0x853a, 0xff7a, + 0x99aa, 0xb06b, 0xee19, 0xcc2c, 0xf34c, 0x7c49, 0xdac3, 0xa71e, 0xc988, + 0x3845, 0x1014 +}; + +static const __sum16 expected_fast_csum[] = { + 0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187, + 0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a, + 0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a, + 0x3a70, 0x9f3a, 0xe89e, 0x75e8, 0x7976, 0xfa79, 0x2cfa, 0x3c2c, 0x463c, + 0x7146, 0x7a71, 0x547a, 0xfd53, 0x99fc, 0xb699, 0x92b6, 0xdb91, 0xe8da, + 0x5fe9, 0x1e60, 0xae1d, 0x39ae, 0xf439, 0xa1f4, 0xdda1, 0xede, 0x790f, + 0x579, 0x1206, 0x9012, 0x2490, 0xd224, 0x5cd2, 0xa65d, 0xca7, 0x220d, + 0xf922, 0xbf9, 0x920b, 0x1b92, 0x361c, 0x2e36, 0x4d2e, 0x24d, 0x2, + 0xcfff, 0x90cf, 0xa591, 0x93a5, 0x7993, 0x9579, 0xc894, 0x50c8, 0x5f50, + 0xd55e, 0xcad5, 0xf3c9, 0x8f4, 0x4409, 0x5043, 0x5b50, 0x55b, 0x2205, + 0x1e22, 0x801e, 0x3780, 0xe137, 0x7ee0, 0xf67d, 0x3cf6, 0xa53c, 0x2ea5, + 0x472e, 0x5147, 0xcf51, 0x1bcf, 0x951c, 0x1e95, 0xc71e, 0xe4c7, 0xc3e4, + 0x3dc3, 0xee3d, 0xa4ed, 0xf9a4, 0xcbf8, 0x75cb, 0xb375, 0x50b4, 0x3551, + 0xf835, 0x19f8, 0x8c1a, 0x538c, 0xad52, 0xa3ac, 0xb0a3, 0x5cb0, 0x6c5c, + 0x5b6c, 0xc05a, 0x92c0, 0x4792, 0xbe47, 0x53be, 0x1554, 0x5715, 0x4b57, + 0xe54a, 0x20e5, 0x21, 0xd500, 0xa1d4, 0xa8a1, 0x57a9, 0xca57, 0x5ca, + 0x1c06, 0x4f1c, 0xe24e, 0xd9e2, 0xf0d9, 0x4af1, 0x474b, 0x8146, 0xe81, + 0xfd0e, 0x84fd, 0x7c85, 0xba7c, 0x17ba, 0x4a17, 0x964a, 0xf595, 0xff5, + 0x5310, 0x3253, 0x6432, 0x4263, 0x2242, 0xe121, 0x32e1, 0xf632, 0xc5f5, + 0x21c6, 0x7d22, 0x8e7c, 0x418e, 0x5641, 0x3156, 0x7c31, 0x737c, 0x373, + 0x2503, 0xc22a, 0x3c2, 0x4a04, 0x8549, 0x5285, 0xa352, 0xe8a3, 0x6fe8, + 0x1a6f, 0x211a, 0xe021, 0x38e0, 0x7638, 0xf575, 0x9df5, 0x169e, 0xf116, + 0x23f1, 0xcd23, 0xece, 0x660f, 0x4866, 0x6a48, 0x716a, 0xee71, 0xa2ee, + 0xb8a2, 0x61b9, 0xa361, 0xf7a2, 0x26f7, 0x1127, 0x6611, 0xe065, 0x36e0, + 0x1837, 0x3018, 0x1c30, 0x721b, 0x3e71, 0xe43d, 0x99e4, 0x9e9a, 0xb79d, + 0xa9b7, 0xcaa, 0xeb0c, 0x4eb, 0x1305, 0x8813, 0xb687, 0xa9b6, 0xfba9, + 0xd7fb, 0xccd8, 0x2ecd, 0x652f, 0xae65, 0x3fae, 0x3a40, 0x563a, 0x7556, + 0x2776, 0x1228, 0xef12, 0xf9ee, 0xcef9, 0x56cf, 0xa956, 0x24a9, 0xba24, + 0x5fba, 0x665f, 0xf465, 0x8ff4, 0x6d8f, 0x346d, 0x5f34, 0x385f, 0xd137, + 0xb8d0, 0xacb8, 0x55ac, 0x7455, 0xe874, 0x89e8, 0xd189, 0xa0d1, 0xb2a0, + 0xb8b2, 0x36b8, 0x5636, 0xd355, 0x8d3, 0x1908, 0x2118, 0xc21, 0x990c, + 0x8b99, 0x158c, 0x7815, 0x9e78, 0x6f9e, 0x4470, 0x1d44, 0x341d, 0x2634, + 0x3f26, 0x793e, 0xc79, 0xcc0b, 0x26cc, 0xd126, 0x1fd1, 0xb41f, 0xb6b4, + 0x22b7, 0xa122, 0xa1, 0x7f01, 0x837e, 0x3b83, 0xaf3b, 0x6fae, 0x916f, + 0xb490, 0xffb3, 0xceff, 0x50cf, 0x7550, 0x7275, 0x1272, 0x2613, 0xaa26, + 0xd5aa, 0x7d5, 0x9607, 0x96, 0xb100, 0xf8b0, 0x4bf8, 0xdd4c, 0xeddd, + 0x98ed, 0x2599, 0x9325, 0xeb92, 0x8feb, 0xcc8f, 0x2acd, 0x392b, 0x3b39, + 0xcb3b, 0x6acb, 0xd46a, 0xb8d4, 0x6ab8, 0x106a, 0x2f10, 0x892f, 0x789, + 0xc806, 0x45c8, 0x7445, 0x3c74, 0x3a3c, 0xcf39, 0xd7ce, 0x58d8, 0x6e58, + 0x336e, 0x1034, 0xee10, 0xe9ed, 0xc2e9, 0x3fc2, 0xd53e, 0xd2d4, 0xead2, + 0x8fea, 0x2190, 0x1162, 0xbe11, 0x8cbe, 0x6d8c, 0xfb6c, 0x6dfb, 0xd36e, + 0x3ad3, 0xf3a, 0x870e, 0xc287, 0x53c3, 0xc54, 0x5b0c, 0x7d5a, 0x797d, + 0xec79, 0x5dec, 0x4d5e, 0x184e, 0xd618, 0x60d6, 0xb360, 0x98b3, 0xf298, + 0xb1f2, 0x69b1, 0xf969, 0xef9, 0xab0e, 0x21ab, 0xe321, 0x24e3, 0x8224, + 0x5481, 0x5954, 0x7a59, 0xff7a, 0x7dff, 0x1a7d, 0xa51a, 0x46a5, 0x6b47, + 0xe6b, 0x830e, 0xa083, 0xff9f, 0xd0ff, 0xffd0, 0xe6ff, 0x7de7, 0xc67d, + 0xd0c6, 0x61d1, 0x3a62, 0xc3b, 0x150c, 0x1715, 0x4517, 0x5345, 0x3954, + 0xdd39, 0xdadd, 0x32db, 0x6a33, 0xd169, 0x86d1, 0xb687, 0x3fb6, 0x883f, + 0xa487, 0x39a4, 0x2139, 0xbe20, 0xffbe, 0xedfe, 0x8ded, 0x368e, 0xc335, + 0x51c3, 0x9851, 0xf297, 0xd6f2, 0xb9d6, 0x95ba, 0x2096, 0xea1f, 0x76e9, + 0x4e76, 0xe04d, 0xd0df, 0x80d0, 0xa280, 0xfca2, 0x75fc, 0xef75, 0x32ef, + 0x6833, 0xdf68, 0xc4df, 0x76c4, 0xb77, 0xb10a, 0xbfb1, 0x58bf, 0x5258, + 0x4d52, 0x6c4d, 0x7e6c, 0xb67e, 0xccb5, 0x8ccc, 0xbe8c, 0xc8bd, 0x9ac8, + 0xa99b, 0x52a9, 0x2f53, 0xc30, 0x3e0c, 0xb83d, 0x83b7, 0x5383, 0x7e53, + 0x4f7e, 0xe24e, 0xb3e1, 0x8db3, 0x618e, 0xc861, 0xfcc8, 0x34fc, 0x9b35, + 0xaa9b, 0xb1aa, 0x5eb1, 0x395e, 0x8639, 0xd486, 0x8bd4, 0x558b, 0x2156, + 0xf721, 0x4ef6, 0x14f, 0x7301, 0xdd72, 0x49de, 0x894a, 0x9889, 0x8898, + 0x7788, 0x7b77, 0x637b, 0xb963, 0xabb9, 0x7cab, 0xc87b, 0x21c8, 0xcb21, + 0xdfca, 0xbfdf, 0xf2bf, 0x6af2, 0x626b, 0xb261, 0x3cb2, 0xc63c, 0xc9c6, + 0xc9c9, 0xb4c9, 0xf9b4, 0x91f9, 0x4091, 0x3a40, 0xcc39, 0xd1cb, 0x7ed1, + 0x537f, 0x6753, 0xa167, 0xba49, 0x88ba, 0x7789, 0x3877, 0xf037, 0xd3ef, + 0xb5d4, 0x55b6, 0xa555, 0xeca4, 0xa1ec, 0xb6a2, 0x7b7, 0x9507, 0xfd94, + 0x82fd, 0x5c83, 0x765c, 0x9676, 0x3f97, 0xda3f, 0x6fda, 0x646f, 0x3064, + 0x5e30, 0x655e, 0x6465, 0xcb64, 0xcdca, 0x4ccd, 0x3f4c, 0x243f, 0x6f24, + 0x656f, 0x6065, 0x3560, 0x3b36, 0xac3b, 0x4aac, 0x714a, 0x7e71, 0xda7e, + 0x7fda, 0xda7f, 0x6fda, 0xff6f, 0xc6ff, 0xedc6, 0xd4ed, 0x70d5, 0xeb70, + 0xa3eb, 0x80a3, 0xca80, 0x3fcb, 0x2540, 0xf825, 0x7ef8, 0xf87e, 0x73f8, + 0xb474, 0xb4b4, 0x92b5, 0x9293, 0x93, 0x3500, 0x7134, 0x9071, 0xfa8f, + 0x51fa, 0x1452, 0xba13, 0x7ab9, 0x957a, 0x8a95, 0x6e8a, 0x6d6e, 0x7c6d, + 0x447c, 0x9744, 0x4597, 0x8945, 0xef88, 0x8fee, 0x3190, 0x4831, 0x8447, + 0xa183, 0x1da1, 0xd41d, 0x2dd4, 0x4f2e, 0xc94e, 0xcbc9, 0xc9cb, 0x9ec9, + 0x319e, 0xd531, 0x20d5, 0x4021, 0xb23f, 0x29b2, 0xd828, 0xecd8, 0x5ded, + 0xfc5d, 0x4dfc, 0xd24d, 0x6bd2, 0x5f6b, 0xb35e, 0x7fb3, 0xee7e, 0x56ee, + 0xa657, 0x68a6, 0x8768, 0x7787, 0xb077, 0x4cb1, 0x764c, 0xb175, 0x7b1, + 0x3d07, 0x603d, 0x3560, 0x3e35, 0xb03d, 0xd6b0, 0xc8d6, 0xd8c8, 0x8bd8, + 0x3e8c, 0x303f, 0xd530, 0xf1d4, 0x42f1, 0xca42, 0xddca, 0x41dd, 0x3141, + 0x132, 0xe901, 0x8e9, 0xbe09, 0xe0bd, 0x2ce0, 0x862d, 0x3986, 0x9139, + 0x6d91, 0x6a6d, 0x8d6a, 0x1b8d, 0xac1b, 0xedab, 0x54ed, 0xc054, 0xcebf, + 0xc1ce, 0x5c2, 0x3805, 0x6038, 0x5960, 0xd359, 0xdd3, 0xbe0d, 0xafbd, + 0x6daf, 0x206d, 0x2c20, 0x862c, 0x8e86, 0xec8d, 0xa2ec, 0xa3a2, 0x51a3, + 0x8051, 0xfd7f, 0x91fd, 0xa292, 0xaf14, 0xeeae, 0x59ef, 0x535a, 0x8653, + 0x3986, 0x9539, 0xb895, 0xa0b8, 0x26a0, 0x2227, 0xc022, 0x77c0, 0xad77, + 0x46ad, 0xaa46, 0x60aa, 0x8560, 0x4785, 0xd747, 0x45d7, 0x2346, 0x5f23, + 0x25f, 0x1d02, 0x71d, 0x8206, 0xc82, 0x180c, 0x3018, 0x4b30, 0x4b, + 0x3001, 0x1230, 0x2d12, 0x8c2d, 0x148d, 0x4015, 0x5f3f, 0x3d5f, 0x6b3d, + 0x396b, 0x473a, 0xf746, 0x44f7, 0x8945, 0x3489, 0xcb34, 0x84ca, 0xd984, + 0xf0d9, 0xbcf0, 0x63bd, 0x3264, 0xf332, 0x45f3, 0x7346, 0x5673, 0xb056, + 0xd3b0, 0x4ad4, 0x184b, 0x7d18, 0x6c7d, 0xbb6c, 0xfeba, 0xe0fe, 0x10e1, + 0x5410, 0x2954, 0x9f28, 0x3a9f, 0x5a3a, 0xdb59, 0xbdc, 0xb40b, 0x1ab4, + 0x131b, 0x5d12, 0x6d5c, 0xe16c, 0xb0e0, 0x89b0, 0xba88, 0xbb, 0x3c01, + 0xe13b, 0x6fe1, 0x446f, 0xa344, 0x81a3, 0xfe81, 0xc7fd, 0x38c8, 0xb38, + 0x1a0b, 0x6d19, 0xf36c, 0x47f3, 0x6d48, 0xb76d, 0xd3b7, 0xd8d2, 0x52d9, + 0x4b53, 0xa54a, 0x34a5, 0xc534, 0x9bc4, 0xed9b, 0xbeed, 0x3ebe, 0x233e, + 0x9f22, 0x4a9f, 0x774b, 0x4577, 0xa545, 0x64a5, 0xb65, 0x870b, 0x487, + 0x9204, 0x5f91, 0xd55f, 0x35d5, 0x1a35, 0x71a, 0x7a07, 0x4e7a, 0xfc4e, + 0x1efc, 0x481f, 0x7448, 0xde74, 0xa7dd, 0x1ea7, 0xaa1e, 0xcfaa, 0xfbcf, + 0xedfb, 0x6eee, 0x386f, 0x4538, 0x6e45, 0xd96d, 0x11d9, 0x7912, 0x4b79, + 0x494b, 0x6049, 0xac5f, 0x65ac, 0x1366, 0x5913, 0xe458, 0x7ae4, 0x387a, + 0x3c38, 0xb03c, 0x76b0, 0x9376, 0xe193, 0x42e1, 0x7742, 0x6476, 0x3564, + 0x3c35, 0x6a3c, 0xcc69, 0x94cc, 0x5d95, 0xe5e, 0xee0d, 0x4ced, 0xce4c, + 0x52ce, 0xaa52, 0xdaaa, 0xe4da, 0x1de5, 0x4530, 0x5445, 0x3954, 0xb639, + 0x81b6, 0x7381, 0x1574, 0xc215, 0x10c2, 0x3f10, 0x6b3f, 0xe76b, 0x7be7, + 0xbc7b, 0xf7bb, 0x41f7, 0xcc41, 0x38cc, 0x4239, 0xa942, 0x4a9, 0xc504, + 0x7cc4, 0x437c, 0x6743, 0xea67, 0x8dea, 0xe88d, 0xd8e8, 0xdcd8, 0x17dd, + 0x5718, 0x958, 0xa609, 0x41a5, 0x5842, 0x159, 0x9f01, 0x269f, 0x5a26, + 0x405a, 0xc340, 0xb4c3, 0xd4b4, 0xf4d3, 0xf1f4, 0x39f2, 0xe439, 0x67e4, + 0x4168, 0xa441, 0xdda3, 0xdedd, 0x9df, 0xab0a, 0xa5ab, 0x9a6, 0xba09, + 0x9ab9, 0xad9a, 0x5ae, 0xe205, 0xece2, 0xecec, 0x14ed, 0xd614, 0x6bd5, + 0x916c, 0x3391, 0x6f33, 0x206f, 0x8020, 0x780, 0x7207, 0x2472, 0x8a23, + 0xb689, 0x3ab6, 0xf739, 0x97f6, 0xb097, 0xa4b0, 0xe6a4, 0x88e6, 0x2789, + 0xb28, 0x350b, 0x1f35, 0x431e, 0x1043, 0xc30f, 0x79c3, 0x379, 0x5703, + 0x3256, 0x4732, 0x7247, 0x9d72, 0x489d, 0xd348, 0xa4d3, 0x7ca4, 0xbf7b, + 0x45c0, 0x7b45, 0x337b, 0x4034, 0x843f, 0xd083, 0x35d0, 0x6335, 0x4d63, + 0xe14c, 0xcce0, 0xfecc, 0x35ff, 0x5636, 0xf856, 0xeef8, 0x2def, 0xfc2d, + 0x4fc, 0x6e04, 0xb66d, 0x78b6, 0xbb78, 0x3dbb, 0x9a3d, 0x839a, 0x9283, + 0x593, 0xd504, 0x23d5, 0x5424, 0xd054, 0x61d0, 0xdb61, 0x17db, 0x1f18, + 0x381f, 0x9e37, 0x679e, 0x1d68, 0x381d, 0x8038, 0x917f, 0x491, 0xbb04, + 0x23bb, 0x4124, 0xd41, 0xa30c, 0x8ba3, 0x8b8b, 0xc68b, 0xd2c6, 0xebd2, + 0x93eb, 0xbd93, 0x99bd, 0x1a99, 0xea19, 0x58ea, 0xcf58, 0x73cf, 0x1073, + 0x9e10, 0x139e, 0xea13, 0xcde9, 0x3ecd, 0x883f, 0xf89, 0x180f, 0x2a18, + 0x212a, 0xce20, 0x73ce, 0xf373, 0x60f3, 0xad60, 0x4093, 0x8e40, 0xb98e, + 0xbfb9, 0xf1bf, 0x8bf1, 0x5e8c, 0xe95e, 0x14e9, 0x4e14, 0x1c4e, 0x7f1c, + 0xe77e, 0x6fe7, 0xf26f, 0x13f2, 0x8b13, 0xda8a, 0x5fda, 0xea5f, 0x4eea, + 0xa84f, 0x88a8, 0x1f88, 0x2820, 0x9728, 0x5a97, 0x3f5b, 0xb23f, 0x70b2, + 0x2c70, 0x232d, 0xf623, 0x4f6, 0x905, 0x7509, 0xd675, 0x28d7, 0x9428, + 0x3794, 0xf036, 0x2bf0, 0xba2c, 0xedb9, 0xd7ed, 0x59d8, 0xed59, 0x4ed, + 0xe304, 0x18e3, 0x5c19, 0x3d5c, 0x753d, 0x6d75, 0x956d, 0x7f95, 0xc47f, + 0x83c4, 0xa84, 0x2e0a, 0x5f2e, 0xb95f, 0x77b9, 0x6d78, 0xf46d, 0x1bf4, + 0xed1b, 0xd6ed, 0xe0d6, 0x5e1, 0x3905, 0x5638, 0xa355, 0x99a2, 0xbe99, + 0xb4bd, 0x85b4, 0x2e86, 0x542e, 0x6654, 0xd765, 0x73d7, 0x3a74, 0x383a, + 0x2638, 0x7826, 0x7677, 0x9a76, 0x7e99, 0x2e7e, 0xea2d, 0xa6ea, 0x8a7, + 0x109, 0x3300, 0xad32, 0x5fad, 0x465f, 0x2f46, 0xc62f, 0xd4c5, 0xad5, + 0xcb0a, 0x4cb, 0xb004, 0x7baf, 0xe47b, 0x92e4, 0x8e92, 0x638e, 0x1763, + 0xc17, 0xf20b, 0x1ff2, 0x8920, 0x5889, 0xcb58, 0xf8cb, 0xcaf8, 0x84cb, + 0x9f84, 0x8a9f, 0x918a, 0x4991, 0x8249, 0xff81, 0x46ff, 0x5046, 0x5f50, + 0x725f, 0xf772, 0x8ef7, 0xe08f, 0xc1e0, 0x1fc2, 0x9e1f, 0x8b9d, 0x108b, + 0x411, 0x2b04, 0xb02a, 0x1fb0, 0x1020, 0x7a0f, 0x587a, 0x8958, 0xb188, + 0xb1b1, 0x49b2, 0xb949, 0x7ab9, 0x917a, 0xfc91, 0xe6fc, 0x47e7, 0xbc47, + 0x8fbb, 0xea8e, 0x34ea, 0x2635, 0x1726, 0x9616, 0xc196, 0xa6c1, 0xf3a6, + 0x11f3, 0x4811, 0x3e48, 0xeb3e, 0xf7ea, 0x1bf8, 0xdb1c, 0x8adb, 0xe18a, + 0x42e1, 0x9d42, 0x5d9c, 0x6e5d, 0x286e, 0x4928, 0x9a49, 0xb09c, 0xa6b0, + 0x2a7, 0xe702, 0xf5e6, 0x9af5, 0xf9b, 0x810f, 0x8080, 0x180, 0x1702, + 0x5117, 0xa650, 0x11a6, 0x1011, 0x550f, 0xd554, 0xbdd5, 0x6bbe, 0xc66b, + 0xfc7, 0x5510, 0x5555, 0x7655, 0x177, 0x2b02, 0x6f2a, 0xb70, 0x9f0b, + 0xcf9e, 0xf3cf, 0x3ff4, 0xcb40, 0x8ecb, 0x768e, 0x5277, 0x8652, 0x9186, + 0x9991, 0x5099, 0xd350, 0x93d3, 0x6d94, 0xe6d, 0x530e, 0x3153, 0xa531, + 0x64a5, 0x7964, 0x7c79, 0x467c, 0x1746, 0x3017, 0x3730, 0x538, 0x5, + 0x1e00, 0x5b1e, 0x955a, 0xae95, 0x3eaf, 0xff3e, 0xf8ff, 0xb2f9, 0xa1b3, + 0xb2a1, 0x5b2, 0xad05, 0x7cac, 0x2d7c, 0xd32c, 0x80d2, 0x7280, 0x8d72, + 0x1b8e, 0x831b, 0xac82, 0xfdac, 0xa7fd, 0x15a8, 0xd614, 0xe0d5, 0x7be0, + 0xb37b, 0x61b3, 0x9661, 0x9d95, 0xc79d, 0x83c7, 0xd883, 0xead7, 0xceb, + 0xf60c, 0xa9f5, 0x19a9, 0xa019, 0x8f9f, 0xd48f, 0x3ad5, 0x853a, 0x985, + 0x5309, 0x6f52, 0x1370, 0x6e13, 0xa96d, 0x98a9, 0x5198, 0x9f51, 0xb69f, + 0xa1b6, 0x2ea1, 0x672e, 0x2067, 0x6520, 0xaf65, 0x6eaf, 0x7e6f, 0xee7e, + 0x17ef, 0xa917, 0xcea8, 0x9ace, 0xff99, 0x5dff, 0xdf5d, 0x38df, 0xa39, + 0x1c0b, 0xe01b, 0x46e0, 0xcb46, 0x90cb, 0xba90, 0x4bb, 0x9104, 0x9d90, + 0xc89c, 0xf6c8, 0x6cf6, 0x886c, 0x1789, 0xbd17, 0x70bc, 0x7e71, 0x17e, + 0x1f01, 0xa01f, 0xbaa0, 0x14bb, 0xfc14, 0x7afb, 0xa07a, 0x3da0, 0xbf3d, + 0x48bf, 0x8c48, 0x968b, 0x9d96, 0xfd9d, 0x96fd, 0x9796, 0x6b97, 0xd16b, + 0xf4d1, 0x3bf4, 0x253c, 0x9125, 0x6691, 0xc166, 0x34c1, 0x5735, 0x1a57, + 0xdc19, 0x77db, 0x8577, 0x4a85, 0x824a, 0x9182, 0x7f91, 0xfd7f, 0xb4c3, + 0xb5b4, 0xb3b5, 0x7eb3, 0x617e, 0x4e61, 0xa4f, 0x530a, 0x3f52, 0xa33e, + 0x34a3, 0x9234, 0xf091, 0xf4f0, 0x1bf5, 0x311b, 0x9631, 0x6a96, 0x386b, + 0x1d39, 0xe91d, 0xe8e9, 0x69e8, 0x426a, 0xee42, 0x89ee, 0x368a, 0x2837, + 0x7428, 0x5974, 0x6159, 0x1d62, 0x7b1d, 0xf77a, 0x7bf7, 0x6b7c, 0x696c, + 0xf969, 0x4cf9, 0x714c, 0x4e71, 0x6b4e, 0x256c, 0x6e25, 0xe96d, 0x94e9, + 0x8f94, 0x3e8f, 0x343e, 0x4634, 0xb646, 0x97b5, 0x8997, 0xe8a, 0x900e, + 0x8090, 0xfd80, 0xa0fd, 0x16a1, 0xf416, 0xebf4, 0x95ec, 0x1196, 0x8911, + 0x3d89, 0xda3c, 0x9fd9, 0xd79f, 0x4bd7, 0x214c, 0x3021, 0x4f30, 0x994e, + 0x5c99, 0x6f5d, 0x326f, 0xab31, 0x6aab, 0xe969, 0x90e9, 0x1190, 0xff10, + 0xa2fe, 0xe0a2, 0x66e1, 0x4067, 0x9e3f, 0x2d9e, 0x712d, 0x8170, 0xd180, + 0xffd1, 0x25ff, 0x3826, 0x2538, 0x5f24, 0xc45e, 0x1cc4, 0xdf1c, 0x93df, + 0xc793, 0x80c7, 0x2380, 0xd223, 0x7ed2, 0xfc7e, 0x22fd, 0x7422, 0x1474, + 0xb714, 0x7db6, 0x857d, 0xa85, 0xa60a, 0x88a6, 0x4289, 0x7842, 0xc278, + 0xf7c2, 0xcdf7, 0x84cd, 0xae84, 0x8cae, 0xb98c, 0x1aba, 0x4d1a, 0x884c, + 0x4688, 0xcc46, 0xd8cb, 0x2bd9, 0xbe2b, 0xa2be, 0x72a2, 0xf772, 0xd2f6, + 0x75d2, 0xc075, 0xa3c0, 0x63a3, 0xae63, 0x8fae, 0x2a90, 0x5f2a, 0xef5f, + 0x5cef, 0xa05c, 0x89a0, 0x5e89, 0x6b5e, 0x736b, 0x773, 0x9d07, 0xe99c, + 0x27ea, 0x2028, 0xc20, 0x980b, 0x4797, 0x2848, 0x9828, 0xc197, 0x48c2, + 0x2449, 0x7024, 0x570, 0x3e05, 0xd3e, 0xf60c, 0xbbf5, 0x69bb, 0x3f6a, + 0x740, 0xf006, 0xe0ef, 0xbbe0, 0xadbb, 0x56ad, 0xcf56, 0xbfce, 0xa9bf, + 0x205b, 0x6920, 0xae69, 0x50ae, 0x2050, 0xf01f, 0x27f0, 0x9427, 0x8993, + 0x8689, 0x4087, 0x6e40, 0xb16e, 0xa1b1, 0xe8a1, 0x87e8, 0x6f88, 0xfe6f, + 0x4cfe, 0xe94d, 0xd5e9, 0x47d6, 0x3148, 0x5f31, 0xc35f, 0x13c4, 0xa413, + 0x5a5, 0x2405, 0xc223, 0x66c2, 0x3667, 0x5e37, 0x5f5e, 0x2f5f, 0x8c2f, + 0xe48c, 0xd0e4, 0x4d1, 0xd104, 0xe4d0, 0xcee4, 0xfcf, 0x480f, 0xa447, + 0x5ea4, 0xff5e, 0xbefe, 0x8dbe, 0x1d8e, 0x411d, 0x1841, 0x6918, 0x5469, + 0x1155, 0xc611, 0xaac6, 0x37ab, 0x2f37, 0xca2e, 0x87ca, 0xbd87, 0xabbd, + 0xb3ab, 0xcb4, 0xce0c, 0xfccd, 0xa5fd, 0x72a5, 0xf072, 0x83f0, 0xfe83, + 0x97fd, 0xc997, 0xb0c9, 0xadb0, 0xe6ac, 0x88e6, 0x1088, 0xbe10, 0x16be, + 0xa916, 0xa3a8, 0x46a3, 0x5447, 0xe953, 0x84e8, 0x2085, 0xa11f, 0xfa1, + 0xdd0f, 0xbedc, 0x5abe, 0x805a, 0xc97f, 0x6dc9, 0x826d, 0x4a82, 0x934a, + 0x5293, 0xd852, 0xd3d8, 0xadd3, 0xf4ad, 0xf3f4, 0xfcf3, 0xfefc, 0xcafe, + 0xb7ca, 0x3cb8, 0xa13c, 0x18a1, 0x1418, 0xea13, 0x91ea, 0xf891, 0x53f8, + 0xa254, 0xe9a2, 0x87ea, 0x4188, 0x1c41, 0xdc1b, 0xf5db, 0xcaf5, 0x45ca, + 0x6d45, 0x396d, 0xde39, 0x90dd, 0x1e91, 0x1e, 0x7b00, 0x6a7b, 0xa46a, + 0xc9a3, 0x9bc9, 0x389b, 0x1139, 0x5211, 0x1f52, 0xeb1f, 0xabeb, 0x48ab, + 0x9348, 0xb392, 0x17b3, 0x1618, 0x5b16, 0x175b, 0xdc17, 0xdedb, 0x1cdf, + 0xeb1c, 0xd1ea, 0x4ad2, 0xd4b, 0xc20c, 0x24c2, 0x7b25, 0x137b, 0x8b13, + 0x618b, 0xa061, 0xff9f, 0xfffe, 0x72ff, 0xf572, 0xe2f5, 0xcfe2, 0xd2cf, + 0x75d3, 0x6a76, 0xc469, 0x1ec4, 0xfc1d, 0x59fb, 0x455a, 0x7a45, 0xa479, + 0xb7a4 +}; + static u8 tmp_buf[TEST_BUFLEN]; #define full_csum(buff, len, sum) csum_fold(csum_partial(buff, len, sum)) @@ -338,10 +575,57 @@ static void test_csum_no_carry_inputs(struct kunit *test) } } +static void test_ip_fast_csum(struct kunit *test) +{ + __sum16 csum_result, expected; + + for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) { + for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) { + csum_result = ip_fast_csum(random_buf + index, len); + expected = + expected_fast_csum[(len - IPv4_MIN_WORDS) * + NUM_IP_FAST_CSUM_TESTS + + index]; + CHECK_EQ(expected, csum_result); + } + } +} + +static void test_csum_ipv6_magic(struct kunit *test) +{ +#if defined(CONFIG_NET) + const struct in6_addr *saddr; + const struct in6_addr *daddr; + unsigned int len; + unsigned char proto; + unsigned int csum; + + const int daddr_offset = sizeof(struct in6_addr); + const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr); + const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) + + sizeof(int); + const int csum_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) + + sizeof(int) + sizeof(char); + + for (int i = 0; i < NUM_IPv6_TESTS; i++) { + saddr = (const struct in6_addr *)(random_buf + i); + daddr = (const struct in6_addr *)(random_buf + i + + daddr_offset); + len = *(unsigned int *)(random_buf + i + len_offset); + proto = *(random_buf + i + proto_offset); + csum = *(unsigned int *)(random_buf + i + csum_offset); + CHECK_EQ(expected_csum_ipv6_magic[i], + csum_ipv6_magic(saddr, daddr, len, proto, csum)); + } +#endif /* !CONFIG_NET */ +} + static struct kunit_case __refdata checksum_test_cases[] = { KUNIT_CASE(test_csum_fixed_random_inputs), KUNIT_CASE(test_csum_all_carry_inputs), KUNIT_CASE(test_csum_no_carry_inputs), + KUNIT_CASE(test_ip_fast_csum), + KUNIT_CASE(test_csum_ipv6_magic), {} }; diff --git a/lib/fw_table.c b/lib/fw_table.c index c49a09ee3853cda00d60fa73f711d44094abdfb4..c3569d2ba503f4d24282a649bba4d147f358859b 100644 --- a/lib/fw_table.c +++ b/lib/fw_table.c @@ -12,12 +12,14 @@ #include #include #include +#include enum acpi_subtable_type { ACPI_SUBTABLE_COMMON, ACPI_SUBTABLE_HMAT, ACPI_SUBTABLE_PRMT, ACPI_SUBTABLE_CEDT, + CDAT_SUBTABLE, }; struct acpi_subtable_entry { @@ -25,7 +27,7 @@ struct acpi_subtable_entry { enum acpi_subtable_type type; }; -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_entry_type(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -37,11 +39,13 @@ acpi_get_entry_type(struct acpi_subtable_entry *entry) return 0; case ACPI_SUBTABLE_CEDT: return entry->hdr->cedt.type; + case CDAT_SUBTABLE: + return entry->hdr->cdat.type; } return 0; } -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_entry_length(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -53,11 +57,16 @@ acpi_get_entry_length(struct acpi_subtable_entry *entry) return entry->hdr->prmt.length; case ACPI_SUBTABLE_CEDT: return entry->hdr->cedt.length; + case CDAT_SUBTABLE: { + __le16 length = (__force __le16)entry->hdr->cdat.length; + + return le16_to_cpu(length); + } } return 0; } -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -69,11 +78,13 @@ acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) return sizeof(entry->hdr->prmt); case ACPI_SUBTABLE_CEDT: return sizeof(entry->hdr->cedt); + case CDAT_SUBTABLE: + return sizeof(entry->hdr->cdat); } return 0; } -static enum acpi_subtable_type __init_or_acpilib +static enum acpi_subtable_type __init_or_fwtbl_lib acpi_get_subtable_type(char *id) { if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) @@ -82,12 +93,27 @@ acpi_get_subtable_type(char *id) return ACPI_SUBTABLE_PRMT; if (strncmp(id, ACPI_SIG_CEDT, 4) == 0) return ACPI_SUBTABLE_CEDT; + if (strncmp(id, ACPI_SIG_CDAT, 4) == 0) + return CDAT_SUBTABLE; return ACPI_SUBTABLE_COMMON; } -static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, - union acpi_subtable_headers *hdr, - unsigned long end) +static unsigned long __init_or_fwtbl_lib +acpi_table_get_length(enum acpi_subtable_type type, + union fw_table_header *header) +{ + if (type == CDAT_SUBTABLE) { + __le32 length = (__force __le32)header->cdat.length; + + return le32_to_cpu(length); + } + + return header->acpi.length; +} + +static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc, + union acpi_subtable_headers *hdr, + unsigned long end) { if (proc->handler) return proc->handler(hdr, end); @@ -119,22 +145,25 @@ static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, * On success returns sum of all matching entries for all proc handlers. * Otherwise, -ENODEV or -EINVAL is returned. */ -int __init_or_acpilib +int __init_or_fwtbl_lib acpi_parse_entries_array(char *id, unsigned long table_size, - struct acpi_table_header *table_header, + union fw_table_header *table_header, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries) { unsigned long table_end, subtable_len, entry_len; struct acpi_subtable_entry entry; + enum acpi_subtable_type type; int count = 0; int i; - table_end = (unsigned long)table_header + table_header->length; + type = acpi_get_subtable_type(id); + table_end = (unsigned long)table_header + + acpi_table_get_length(type, table_header); /* Parse all entries looking for a match. */ - entry.type = acpi_get_subtable_type(id); + entry.type = type; entry.hdr = (union acpi_subtable_headers *) ((unsigned long)table_header + table_size); subtable_len = acpi_get_subtable_header_length(&entry); @@ -174,3 +203,25 @@ acpi_parse_entries_array(char *id, unsigned long table_size, return count; } + +int __init_or_fwtbl_lib +cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, + void *arg, + struct acpi_table_cdat *table_header) +{ + struct acpi_subtable_proc proc = { + .id = type, + .handler_arg = handler_arg, + .arg = arg, + }; + + if (!table_header) + return -EINVAL; + + return acpi_parse_entries_array(ACPI_SIG_CDAT, + sizeof(struct acpi_table_cdat), + (union fw_table_header *)table_header, + &proc, 1, 0); +} +EXPORT_SYMBOL_FWTBL_LIB(cdat_table_parse); diff --git a/lib/nlattr.c b/lib/nlattr.c index dc15e7888fc1fec5747252f3ef1b3d7b5d7d5bd8..ed2ab43e1b22c0156e5d361c6bfa7eb745759232 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -758,7 +758,7 @@ EXPORT_SYMBOL(nla_find); * @dstsize: Size of destination buffer. * * Copies at most dstsize - 1 bytes into the destination buffer. - * Unlike strlcpy the destination buffer is always padded out. + * Unlike strscpy() the destination buffer is always padded out. * * Return: * * srclen - Returns @nla length (not including the trailing %NUL). diff --git a/lib/sbitmap.c b/lib/sbitmap.c index d0a5081dfd122e42702748c30fea79100d84727b..92c6b1fd898938e4613d8289cf49c09fd53bb93b 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -388,11 +388,6 @@ static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int shallow_depth; /* - * For each batch, we wake up one queue. We need to make sure that our - * batch size is small enough that the full depth of the bitmap, - * potentially limited by a shallow depth, is enough to wake up all of - * the queues. - * * Each full word of the bitmap has bits_per_word bits, and there might * be a partial word. There are depth / bits_per_word full words and * depth % bits_per_word bits left over. In bitwise arithmetic: diff --git a/lib/string.c b/lib/string.c index be26623953d2e6ef96a41567ed65e5c99787b7fb..6891d15ce991c308f198659e980f9bc9d6522335 100644 --- a/lib/string.c +++ b/lib/string.c @@ -103,21 +103,6 @@ char *strncpy(char *dest, const char *src, size_t count) EXPORT_SYMBOL(strncpy); #endif -#ifndef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - __builtin_memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} -EXPORT_SYMBOL(strlcpy); -#endif - #ifndef __HAVE_ARCH_STRSCPY ssize_t strscpy(char *dest, const char *src, size_t count) { diff --git a/lib/test_fortify/write_overflow-strlcpy-src.c b/lib/test_fortify/write_overflow-strlcpy-src.c deleted file mode 100644 index 91bf83ebd34a535d1dab18db977f747cdd5692c4..0000000000000000000000000000000000000000 --- a/lib/test_fortify/write_overflow-strlcpy-src.c +++ /dev/null @@ -1,5 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#define TEST \ - strlcpy(small, large_src, sizeof(small) + 1) - -#include "test_fortify.h" diff --git a/lib/test_fortify/write_overflow-strlcpy.c b/lib/test_fortify/write_overflow-strlcpy.c deleted file mode 100644 index 1883db7c0cd67232a13646ab01824f8077b29649..0000000000000000000000000000000000000000 --- a/lib/test_fortify/write_overflow-strlcpy.c +++ /dev/null @@ -1,5 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#define TEST \ - strlcpy(instance.buf, large_src, sizeof(instance.buf) + 1) - -#include "test_fortify.h" diff --git a/mm/Kconfig b/mm/Kconfig index 1902cfe4cc4f5075fce5ad9e6eb7c380d6087e20..ffc3a2ba3a8cd85e2e6d95606bcab1510ce0d679 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1258,6 +1258,9 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP +config IOMMU_MM_DATA + bool + source "mm/damon/Kconfig" endmenu diff --git a/mm/filemap.c b/mm/filemap.c index ea49677c63385af4a82981511384f63fc21e7c60..750e779c23db74730fa7743c2307d1b996729d62 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2688,6 +2688,7 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count) return filemap_write_and_wait_range(mapping, pos, end); } +EXPORT_SYMBOL_GPL(kiocb_write_and_wait); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) { @@ -2715,6 +2716,7 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT); } +EXPORT_SYMBOL_GPL(kiocb_invalidate_pages); /** * generic_file_read_iter - generic filesystem read routine diff --git a/mm/init-mm.c b/mm/init-mm.c index cfd367822cdd2ebe94181c118c1d14b73cc4b5e8..24c809379274503ac4f261fe7cfdbab3cb1ed1e7 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -44,9 +44,6 @@ struct mm_struct init_mm = { #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, -#ifdef CONFIG_IOMMU_SVA - .pasid = IOMMU_PASID_INVALID, -#endif INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/memblock.c b/mm/memblock.c index 1cc77871b658e944dff2b1ce4f1918da486826e1..88cd6417f35b51d7edc61c5b5d9b393a038e2081 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1886,7 +1886,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn, int mid = memblock_search(type, PFN_PHYS(pfn)); if (mid == -1) - return -1; + return NUMA_NO_NODE; *start_pfn = PFN_DOWN(type->regions[mid].base); *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 8d5291add2bce6df6849165b725e73ab04dff557..5462d9e3c84c7d41af4e29f00c16cd00efb0ace3 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -109,7 +109,7 @@ static struct demotion_nodes *node_demotion __read_mostly; static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); static bool default_dram_perf_error; -static struct node_hmem_attrs default_dram_perf; +static struct access_coordinate default_dram_perf; static int default_dram_perf_ref_nid = NUMA_NO_NODE; static const char *default_dram_perf_ref_source; @@ -601,15 +601,15 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) } EXPORT_SYMBOL_GPL(clear_node_memory_type); -static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix) +static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) { pr_info( "%sread_latency: %u, write_latency: %u, read_bandwidth: %u, write_bandwidth: %u\n", - prefix, attrs->read_latency, attrs->write_latency, - attrs->read_bandwidth, attrs->write_bandwidth); + prefix, coord->read_latency, coord->write_latency, + coord->read_bandwidth, coord->write_bandwidth); } -int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { int rc = 0; @@ -666,7 +666,7 @@ out: return rc; } -int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { if (default_dram_perf_error) return -EIO; diff --git a/mm/percpu.c b/mm/percpu.c index 6cb2ef197b0e687ed5888e8133fb640d2aed04f9..574c386e0dbf3947e8cd81cb315229220ec7f76b 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3334,13 +3334,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t if (rc < 0) panic("failed to map percpu area, err=%d\n", rc); - /* - * FIXME: Archs with virtual cache should flush local - * cache for the linear mapping here - something - * equivalent to flush_cache_vmap() on the local cpu. - * flush_cache_vmap() can't be used as most supporting - * data structures are not set up yet. - */ + flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); /* copy static data */ memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6adcb45bca75d9426a2952ff84d0fdb9a89b4c33..ed17208907578a231d283c04bd97ce48bebdffaa 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -279,8 +279,17 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && READ_ONCE(neigh->hh.hh_len)) { + struct net_device *br_indev; + + br_indev = nf_bridge_get_physindev(skb, net); + if (!br_indev) { + neigh_release(neigh); + goto free_skb; + } + neigh_hh_bridge(&neigh->hh, skb); - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; + ret = br_handle_frame_finish(net, sk, skb); } else { /* the neighbour function below overwrites the complete @@ -352,12 +361,18 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev, *br_indev; struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; + br_indev = nf_bridge_get_physindev(skb, net); + if (!br_indev) { + kfree_skb(skb); + return 0; + } + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { @@ -397,7 +412,7 @@ free_skb: } else { if (skb_dst(skb)->dev == dev) { bridged_dnat: - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, @@ -410,7 +425,7 @@ bridged_dnat: skb->pkt_type = PACKET_HOST; } } else { - rt = bridge_parent_rtable(nf_bridge->physindev); + rt = bridge_parent_rtable(br_indev); if (!rt) { kfree_skb(skb); return 0; @@ -419,7 +434,7 @@ bridged_dnat: skb_dst_set_noref(skb, &rt->dst); } - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, @@ -456,7 +471,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) } nf_bridge->in_prerouting = 1; - nf_bridge->physindev = skb->dev; + nf_bridge->physinif = skb->dev->ifindex; skb->dev = brnf_get_logical_dev(skb, skb->dev, net); if (skb->protocol == htons(ETH_P_8021Q)) @@ -553,7 +568,11 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff if (skb->protocol == htons(ETH_P_IPV6)) nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; - in = nf_bridge->physindev; + in = nf_bridge_get_physindev(skb, net); + if (!in) { + kfree_skb(skb); + return 0; + } if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; @@ -899,6 +918,13 @@ static unsigned int ip_sabotage_in(void *priv, static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *br_indev; + + br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev)); + if (!br_indev) { + kfree_skb(skb); + return; + } skb_pull(skb, ETH_HLEN); nf_bridge->bridged_dnat = 0; @@ -908,7 +934,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; nf_bridge->physoutdev = NULL; br_handle_frame_finish(dev_net(skb->dev), NULL, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 2e24a743f91731cad6a8791e85af7f7aeadb8352..e0421eaa3abc78b8587d551c6e91682bba28c79d 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -102,9 +102,15 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; - struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev, *br_indev; const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + br_indev = nf_bridge_get_physindev(skb, net); + if (!br_indev) { + kfree_skb(skb); + return 0; + } + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { @@ -122,7 +128,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc } if (skb_dst(skb)->dev == dev) { - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, @@ -133,7 +139,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } else { - rt = bridge_parent_rtable(nf_bridge->physindev); + rt = bridge_parent_rtable(br_indev); if (!rt) { kfree_skb(skb); return 0; @@ -142,7 +148,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb_dst_set_noref(skb, &rt->dst); } - skb->dev = nf_bridge->physindev; + skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d3a759e052c81f066710a467cc9d9baf3dbf8e20..625622016f5761e36bccc3f7a239e265039ce95d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5850,8 +5850,6 @@ static inline void convert_extent_map(struct ceph_sparse_read *sr) } #endif -#define MAX_EXTENTS 4096 - static int osd_sparse_read(struct ceph_connection *con, struct ceph_msg_data_cursor *cursor, char **pbuf) @@ -5882,23 +5880,16 @@ next_op: if (count > 0) { if (!sr->sr_extent || count > sr->sr_ext_len) { - /* - * Apply a hard cap to the number of extents. - * If we have more, assume something is wrong. - */ - if (count > MAX_EXTENTS) { - dout("%s: OSD returned 0x%x extents in a single reply!\n", - __func__, count); - return -EREMOTEIO; - } - /* no extent array provided, or too short */ kfree(sr->sr_extent); sr->sr_extent = kmalloc_array(count, sizeof(*sr->sr_extent), GFP_NOIO); - if (!sr->sr_extent) + if (!sr->sr_extent) { + pr_err("%s: failed to allocate %u extents\n", + __func__, count); return -ENOMEM; + } sr->sr_ext_len = count; } ret = count * sizeof(*sr->sr_extent); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5f6ed6da3cfc0b6e7a660760a9efe242b5478f12..f6f29eb03ec277a1ea17ccc220fa7624bf6db092 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2899,13 +2899,6 @@ static int do_setlink(const struct sk_buff *skb, call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } - if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); - if (err) - goto errout; - status |= DO_SETLINK_MODIFIED; - } - if (ifm->ifi_flags || ifm->ifi_change) { err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), extack); @@ -2913,6 +2906,13 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); + if (err) + goto errout; + status |= DO_SETLINK_MODIFIED; + } + if (tb[IFLA_CARRIER]) { err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) diff --git a/net/dsa/user.c b/net/dsa/user.c index b738a466e2dccb9dce8226a1b3c047ba93414b8c..b15e71cc342c7963c9c88951d5e058314dd15575 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2806,13 +2806,14 @@ EXPORT_SYMBOL_GPL(dsa_user_dev_check); static int dsa_user_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { - struct dsa_port *dp = dsa_user_to_port(dev); struct netlink_ext_ack *extack; int err = NOTIFY_DONE; + struct dsa_port *dp; if (!dsa_user_dev_check(dev)) return err; + dp = dsa_user_to_port(dev); extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_bridge_master(info->upper_dev)) { @@ -2865,11 +2866,13 @@ static int dsa_user_changeupper(struct net_device *dev, static int dsa_user_prechangeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { - struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_port *dp; if (!dsa_user_dev_check(dev)) return NOTIFY_DONE; + dp = dsa_user_to_port(dev); + if (netif_is_bridge_master(info->upper_dev) && !info->linking) dsa_port_pre_bridge_leave(dp, info->upper_dev); else if (netif_is_lag_master(info->upper_dev) && !info->linking) diff --git a/net/ethtool/features.c b/net/ethtool/features.c index a79af8c25a07121baf0e66f30f9161c9cf7cdee6..b6cb101d7f19ef5038f6f0e10139b1e5ccfa4af5 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -234,17 +234,20 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, tb[ETHTOOL_A_FEATURES_WANTED], netdev_features_strings, info->extack); if (ret < 0) - goto out_rtnl; + goto out_ops; if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); ret = -EINVAL; - goto out_rtnl; + goto out_ops; } /* set req_wanted bits not in req_mask from old_wanted */ @@ -281,6 +284,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) if (mod) netdev_features_change(dev); +out_ops: + ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b099c315015096f3637f4770ee9a6553baac2637..cb83c8feb746535fe4126f4172ffbafaac58b0d5 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -167,4 +167,5 @@ static void __exit hsr_exit(void) module_init(hsr_init); module_exit(hsr_exit); +MODULE_DESCRIPTION("High-availability Seamless Redundancy (HSR) driver"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index f01b038fc1cda0257fb29df9a8832310378bd1fb..04504b2b51df562c2d4be27c16fdc294e442239b 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -239,7 +239,6 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { - struct net_device *br_indev __maybe_unused; struct sk_buff *nskb; struct iphdr *niph; const struct tcphdr *oth; @@ -289,9 +288,13 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, * build the eth header using the original destination's MAC as the * source, and send the RST packet directly. */ - br_indev = nf_bridge_get_physindev(oldskb); - if (br_indev) { + if (nf_bridge_info_exists(oldskb)) { struct ethhdr *oeth = eth_hdr(oldskb); + struct net_device *br_indev; + + br_indev = nf_bridge_get_physindev(oldskb, net); + if (!br_indev) + goto free_nskb; nskb->dev = br_indev; niph->tot_len = htons(nskb->len); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 89e5a806b82e9c83b583d454e1b58b7838068f04..148ffb007969f57edc4be8ec1c235062ad49b503 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -805,7 +805,7 @@ void udp_flush_pending_frames(struct sock *sk) if (up->pending) { up->len = 0; - up->pending = 0; + WRITE_ONCE(up->pending, 0); ip_flush_pending_frames(sk); } } @@ -993,7 +993,7 @@ int udp_push_pending_frames(struct sock *sk) out: up->len = 0; - up->pending = 0; + WRITE_ONCE(up->pending, 0); return err; } EXPORT_SYMBOL(udp_push_pending_frames); @@ -1070,7 +1070,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; fl4 = &inet->cork.fl.u.ip4; - if (up->pending) { + if (READ_ONCE(up->pending)) { /* * There are pending frames. * The socket lock must be held while it's corked. @@ -1269,7 +1269,7 @@ back_from_confirm: fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = inet->inet_sport; - up->pending = AF_INET; + WRITE_ONCE(up->pending, AF_INET); do_append_data: up->len += ulen; @@ -1281,7 +1281,7 @@ do_append_data: else if (!corkreq) err = udp_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; + WRITE_ONCE(up->pending, 0); release_sock(sk); out: @@ -1319,7 +1319,7 @@ void udp_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || udp_test_bit(CORK, sk)) + if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk)) return; lock_sock(sk); @@ -3137,16 +3137,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq) struct bpf_udp_iter_state *iter = seq->private; struct udp_iter_state *state = &iter->state; struct net *net = seq_file_net(seq); + int resume_bucket, resume_offset; struct udp_table *udptable; unsigned int batch_sks = 0; bool resized = false; struct sock *sk; + resume_bucket = state->bucket; + resume_offset = iter->offset; + /* The current batch is done, so advance the bucket. */ - if (iter->st_bucket_done) { + if (iter->st_bucket_done) state->bucket++; - iter->offset = 0; - } udptable = udp_get_table_seq(seq, net); @@ -3166,19 +3168,19 @@ again: for (; state->bucket <= udptable->mask; state->bucket++) { struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; - if (hlist_empty(&hslot2->head)) { - iter->offset = 0; + if (hlist_empty(&hslot2->head)) continue; - } + iter->offset = 0; spin_lock_bh(&hslot2->lock); udp_portaddr_for_each_entry(sk, &hslot2->head) { if (seq_sk_match(seq, sk)) { /* Resume from the last iterated socket at the * offset in the bucket before iterator was stopped. */ - if (iter->offset) { - --iter->offset; + if (state->bucket == resume_bucket && + iter->offset < resume_offset) { + ++iter->offset; continue; } if (iter->end_sk < iter->max_sk) { @@ -3192,9 +3194,6 @@ again: if (iter->end_sk) break; - - /* Reset the current bucket's offset before moving to the next bucket. */ - iter->offset = 0; } /* All done: no batch made. */ @@ -3213,7 +3212,6 @@ again: /* After allocating a larger batch, retry one more time to grab * the whole bucket. */ - state->bucket--; goto again; } done: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b75d3c9d41bb5005af2d4e10fab58f157e9ea4fa..bc6e0a0bad3c12d641a1dc60a8c790a6e72b1b5f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2722,8 +2722,12 @@ void ipv6_mc_down(struct inet6_dev *idev) synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); + + mutex_lock(&idev->mc_lock); mld_ifc_stop_work(idev); mld_gq_stop_work(idev); + mutex_unlock(&idev->mc_lock); + mld_dad_stop_work(idev); } diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index d45bc54b7ea55d03ffbea6de9ef3db8c098c217f..196dd4ecb5e215f8a1de321bf249bec6fca6b97c 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -278,7 +278,6 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { - struct net_device *br_indev __maybe_unused; struct sk_buff *nskb; struct tcphdr _otcph; const struct tcphdr *otcph; @@ -354,9 +353,15 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, * build the eth header using the original destination's MAC as the * source, and send the RST packet directly. */ - br_indev = nf_bridge_get_physindev(oldskb); - if (br_indev) { + if (nf_bridge_info_exists(oldskb)) { struct ethhdr *oeth = eth_hdr(oldskb); + struct net_device *br_indev; + + br_indev = nf_bridge_get_physindev(oldskb, net); + if (!br_indev) { + kfree_skb(nskb); + return; + } nskb->dev = br_indev; nskb->protocol = htons(ETH_P_IPV6); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 594e3f23c12909fe6f245bf31057278169cd85c5..3f2249b4cd5f6a594dd9768e29f20f0d9a57faed 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1135,7 +1135,7 @@ static void udp_v6_flush_pending_frames(struct sock *sk) udp_flush_pending_frames(sk); else if (up->pending) { up->len = 0; - up->pending = 0; + WRITE_ONCE(up->pending, 0); ip6_flush_pending_frames(sk); } } @@ -1313,7 +1313,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) &inet_sk(sk)->cork.base); out: up->len = 0; - up->pending = 0; + WRITE_ONCE(up->pending, 0); return err; } @@ -1370,7 +1370,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) default: return -EINVAL; } - } else if (!up->pending) { + } else if (!READ_ONCE(up->pending)) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; @@ -1401,8 +1401,8 @@ do_udp_sendmsg: return -EMSGSIZE; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - if (up->pending) { - if (up->pending == AF_INET) + if (READ_ONCE(up->pending)) { + if (READ_ONCE(up->pending) == AF_INET) return udp_sendmsg(sk, msg, len); /* * There are pending frames. @@ -1593,7 +1593,7 @@ back_from_confirm: goto out; } - up->pending = AF_INET6; + WRITE_ONCE(up->pending, AF_INET6); do_append_data: if (ipc6.dontfrag < 0) @@ -1607,7 +1607,7 @@ do_append_data: else if (!corkreq) err = udp_v6_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; + WRITE_ONCE(up->pending, 0); if (err > 0) err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; @@ -1648,7 +1648,7 @@ static void udpv6_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || udp_test_bit(CORK, sk)) + if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk)) return; lock_sock(sk); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c53914012d01d38c2dc0a3578bf3651595956e72..d2527d189a799319c068a5b76a5816cc7a905861 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -123,8 +123,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, break; case MPTCPOPT_MP_JOIN: - mp_opt->suboptions |= OPTIONS_MPTCP_MPJ; if (opsize == TCPOLEN_MPTCP_MPJ_SYN) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->token = get_unaligned_be32(ptr); @@ -135,6 +135,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->thmac = get_unaligned_be64(ptr); @@ -145,11 +146,10 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac"); - } else { - mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ; } break; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3eacd04e7099e6de1a161c176a74959722445286..0dcb721c89d193e8943aa414610fcf4284d51f38 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -157,8 +157,8 @@ static int subflow_check_req(struct request_sock *req, mptcp_get_options(skb, &mp_opt); - opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC); - opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ); + opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN); + opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN); if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -254,8 +254,8 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, subflow_init_req(req, sk_listener); mptcp_get_options(skb, &mp_opt); - opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC); - opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ); + opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK); + opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK); if (opt_mp_capable && opt_mp_join) return -EINVAL; @@ -486,7 +486,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) { + if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); mptcp_do_fallback(sk); @@ -506,7 +506,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) { + if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) { subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -783,12 +783,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * options. */ mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) + if (!(mp_opt.suboptions & + (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK))) fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) || + if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 95aeb31c60e0d74fe1d8d2abc0507966c391e412..30a655e5c4fdcd3424f5d8516c9724ecd100580a 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -138,9 +138,9 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next, #include "ip_set_hash_gen.h" #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -static const char *get_physindev_name(const struct sk_buff *skb) +static const char *get_physindev_name(const struct sk_buff *skb, struct net *net) { - struct net_device *dev = nf_bridge_get_physindev(skb); + struct net_device *dev = nf_bridge_get_physindev(skb, net); return dev ? dev->name : NULL; } @@ -177,7 +177,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const char *eiface = SRCDIR ? get_physindev_name(skb) : + const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) @@ -395,7 +395,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const char *eiface = SRCDIR ? get_physindev_name(skb) : + const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 9193e109e6b38f1336bb13e26ffb5399d1b15381..65e0259178da43c61733973e50b69c77da5027c5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -271,7 +271,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return false; } @@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, { if (ip_hdr(skb)->ttl <= 1) { /* Tell the sender its packet died... */ - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); return false; } diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index c66689ad2b491977876aa47e6d4201de29244950..58402226045e84b7134b3c8a33919b5ea1f22f12 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -111,7 +111,8 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct nf_loginfo *loginfo, const char *prefix) + const struct nf_loginfo *loginfo, const char *prefix, + struct net *net) { const struct net_device *physoutdev __maybe_unused; const struct net_device *physindev __maybe_unused; @@ -121,7 +122,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf, in ? in->name : "", out ? out->name : ""); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - physindev = nf_bridge_get_physindev(skb); + physindev = nf_bridge_get_physindev(skb, net); if (physindev && in != physindev) nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); physoutdev = nf_bridge_get_physoutdev(skb); @@ -148,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, - prefix); + prefix, net); dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); @@ -845,7 +846,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, - out, loginfo, prefix); + out, loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); @@ -880,7 +881,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, - loginfo, prefix); + loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); @@ -916,7 +917,7 @@ static void nf_log_unknown_packet(struct net *net, u_int8_t pf, loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, - prefix); + prefix, net); dump_mac_header(m, loginfo, skb); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 63d1516816b1fdaa570288c725cc5f721cde694d..e2f334f70281f8bb29ed0ea8eb974382708c9de6 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -82,11 +82,9 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct sk_buff *skb = entry->skb; - struct nf_bridge_info *nf_bridge; - nf_bridge = nf_bridge_info_get(skb); - if (nf_bridge) { - entry->physin = nf_bridge_get_physindev(skb); + if (nf_bridge_info_exists(skb)) { + entry->physin = nf_bridge_get_physindev(skb, entry->state.net); entry->physout = nf_bridge_get_physoutdev(skb); } else { entry->physin = NULL; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8438a8922e4ab9612784cd9d2ed933f218a26bd3..4b55533ce5ca2c29b1648b4f36de3e835c8953a6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2977,6 +2977,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, */ int nft_register_expr(struct nft_expr_type *type) { + if (WARN_ON_ONCE(type->maxattr > NFT_EXPR_MAXATTR)) + return -ENOMEM; + nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -3271,14 +3274,13 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - if (src->ops->clone) { - dst->ops = src->ops; - err = src->ops->clone(dst, src); - if (err < 0) - return err; - } else { - memcpy(dst, src, src->ops->size); - } + if (WARN_ON_ONCE(!src->ops->clone)) + return -EINVAL; + + dst->ops = src->ops; + err = src->ops->clone(dst, src); + if (err < 0) + return err; __module_get(src->ops->type->owner); @@ -4811,8 +4813,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { + u32 num_regs = 0, key_num_regs = 0; struct nlattr *attr; - u32 num_regs = 0; int rem, err, i; nla_for_each_nested(attr, nla, rem) { @@ -4827,6 +4829,10 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, for (i = 0; i < desc->field_count; i++) num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); + if (key_num_regs != num_regs) + return -EINVAL; + if (num_regs > NFT_REG32_COUNT) return -E2BIG; @@ -5048,16 +5054,28 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, } desc.policy = NFT_SET_POL_PERFORMANCE; - if (nla[NFTA_SET_POLICY] != NULL) + if (nla[NFTA_SET_POLICY] != NULL) { desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + switch (desc.policy) { + case NFT_SET_POL_PERFORMANCE: + case NFT_SET_POL_MEMORY: + break; + default: + return -EOPNOTSUPP; + } + } if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; - if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT)) + if (desc.field_count > 1) { + if (!(flags & NFT_SET_CONCAT)) + return -EINVAL; + } else if (flags & NFT_SET_CONCAT) { return -EINVAL; + } } else if (flags & NFT_SET_CONCAT) { return -EINVAL; } @@ -5704,7 +5722,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_set_dump_args *args; - if (nft_set_elem_expired(ext)) + if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) return 0; args = container_of(iter, struct nft_set_dump_args, iter); @@ -6568,7 +6586,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net, list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_is_active(net, ext)) + if (!nft_is_active_next(net, ext)) continue; kfree(elem->priv); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f03f4d4d7d88967d237c5064cd729ea6f83b40bf..134e05d31061e4b6daa977cb47084f0da20f697c 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -508,7 +508,7 @@ __build_packet_message(struct nfnl_log_net *log, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { - struct net_device *physindev; + int physinif; /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -516,10 +516,10 @@ __build_packet_message(struct nfnl_log_net *log, htonl(indev->ifindex))) goto nla_put_failure; - physindev = nf_bridge_get_physindev(skb); - if (physindev && + physinif = nf_bridge_get_physinif(skb); + if (physinif && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, - htonl(physindev->ifindex))) + htonl(physinif))) goto nla_put_failure; } #endif diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 145dc62c62472605e9f99c6e69f240e4c66774e9..79039afde34ecb1ca9fe1494855676ab26e7c53b 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -58,6 +58,7 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { + bool invert = false; u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || @@ -90,19 +91,23 @@ static int nft_limit_init(struct nft_limit_priv *priv, priv->rate); } + if (tb[NFTA_LIMIT_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); + + if (flags & ~NFT_LIMIT_F_INV) + return -EOPNOTSUPP; + + if (flags & NFT_LIMIT_F_INV) + invert = true; + } + priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT); if (!priv->limit) return -ENOMEM; priv->limit->tokens = tokens; priv->tokens_max = priv->limit->tokens; - - if (tb[NFTA_LIMIT_FLAGS]) { - u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); - - if (flags & NFT_LIMIT_F_INV) - priv->invert = true; - } + priv->invert = invert; priv->limit->last = ktime_get_ns(); spin_lock_init(&priv->limit->lock); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index ec6ed6fda96c5903d6136fce62f82912dc0701cb..343e65f377d442a9fda9e3556b04cbb7fea20174 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -59,7 +59,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; - physdev = nf_bridge_get_physindev(skb); + physdev = nf_bridge_get_physindev(skb, xt_net(par)); indev = physdev ? physdev->name : NULL; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index d63d2e5dc60c97e46ae977674e113b019169682b..dae378f1d52b607abf1cd0931473b28a390f1949 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -858,4 +858,5 @@ void nfc_digital_unregister_device(struct nfc_digital_dev *ddev) } EXPORT_SYMBOL(nfc_digital_unregister_device); +MODULE_DESCRIPTION("NFC Digital protocol stack"); MODULE_LICENSE("GPL"); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6c9592d051206f242b2249f3a5a57d4640333a2c..97348cedb16b30d9a60cb8096a8408f6a8890e6d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1577,4 +1577,5 @@ static void nci_cmd_work(struct work_struct *work) } } +MODULE_DESCRIPTION("NFC Controller Interface"); MODULE_LICENSE("GPL"); diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index b68150c971d0b108dd97411fa9362e09d47caede..6a93533c480e656914d83278068ebf10b4189b24 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -319,4 +319,5 @@ done: } EXPORT_SYMBOL_GPL(nci_spi_read); +MODULE_DESCRIPTION("NFC Controller Interface (NCI) SPI link layer"); MODULE_LICENSE("GPL"); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2f8b39a614c319e0cf604a57ef2e6e17d89d5151..dbeb75c298573adc580568744d6781a5c6193b0d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1079,6 +1079,7 @@ void rxrpc_send_version_request(struct rxrpc_local *local, /* * local_object.c */ +void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set); struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c553a30e9c8386384cc5f038c3ecb6570349c7b7..34d30736813531007f0a394f897f531f6db66eb4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -36,6 +36,17 @@ static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err, return ipv6_icmp_error(sk, skb, err, port, info, payload); } +/* + * Set or clear the Don't Fragment flag on a socket. + */ +void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set) +{ + if (set) + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); + else + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT); +} + /* * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, * same or greater than. @@ -203,7 +214,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) ip_sock_set_recverr(usk); /* we want to set the don't fragment bit */ - ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO); + rxrpc_local_dont_fragment(local, true); /* We want receive timestamps. */ sock_enable_timestamps(usk); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5e53429c692288cef7a868fcbb61cbcc7ffe3c85..a0906145e8293ca457fd0b1493ba3892f5f0729a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -494,14 +494,12 @@ send_fragmentable: switch (conn->local->srx.transport.family) { case AF_INET6: case AF_INET: - ip_sock_set_mtu_discover(conn->local->socket->sk, - IP_PMTUDISC_DONT); + rxrpc_local_dont_fragment(conn->local, false); rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag); ret = do_udp_sendmsg(conn->local->socket, &msg, len); conn->peer->last_tx_at = ktime_get_seconds(); - ip_sock_set_mtu_discover(conn->local->socket->sk, - IP_PMTUDISC_DO); + rxrpc_local_dont_fragment(conn->local, true); break; default: diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 1bf571a66e020d263ceb1d5a4489253b8fbf9728..b52dedcebce0a7aafe0888f97e79bb81435749f2 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -724,7 +724,9 @@ static int rxkad_send_response(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); + rxrpc_local_dont_fragment(conn->local, false); ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len); + rxrpc_local_dont_fragment(conn->local, true); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_response); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e3236a3169c32f470ca9a6abfcb065d79d9860fe..92a12e3d0fe63646b1d82751c9986e08de6ab673 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1424,6 +1424,14 @@ static void tcf_block_owner_del(struct tcf_block *block, WARN_ON(1); } +static bool tcf_block_tracks_dev(struct tcf_block *block, + struct tcf_block_ext_info *ei) +{ + return tcf_block_shared(block) && + (ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS || + ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS); +} + int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) @@ -1462,7 +1470,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, if (err) goto err_block_offload_bind; - if (tcf_block_shared(block)) { + if (tcf_block_tracks_dev(block, ei)) { err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); if (err) { NL_SET_ERR_MSG(extack, "block dev insert failed"); @@ -1516,7 +1524,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, if (!block) return; - if (tcf_block_shared(block)) + if (tcf_block_tracks_dev(block, ei)) xa_erase(&block->ports, dev->ifindex); tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1af71fbb0d80590632439c436a81d218a4123bff..c7af0220f82f42d8e630c9ee182d4e3c4922fb87 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2280,6 +2280,7 @@ static void __exit exit_rpcsec_gss(void) } MODULE_ALIAS("rpc-auth-6"); +MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication"); MODULE_LICENSE("GPL"); module_param_named(expired_cred_retry_delay, gss_expired_cred_retry_delay, diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index e31cfdf7eadcb9bfc6b3d3f3ad1e71db4f03b72e..64cff717c3d9b30c101bbd090df41bd161e20ee7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -650,6 +650,7 @@ static void __exit cleanup_kerberos_module(void) gss_mech_unregister(&gss_kerberos_mech); } +MODULE_DESCRIPTION("Sun RPC Kerberos 5 module"); MODULE_LICENSE("GPL"); module_init(init_kerberos_module); module_exit(cleanup_kerberos_module); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 691c0000e9eae14c0d0dfd08b2969fa663252ed0..bab6cab2940524a970422b62b3fa4212c61c4f43 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -148,6 +148,7 @@ cleanup_sunrpc(void) #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ } +MODULE_DESCRIPTION("Sun RPC core"); MODULE_LICENSE("GPL"); fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */ module_exit(cleanup_sunrpc); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e37b4d2e2acde25d6879770629b3996d03860c56..31e8a94dfc111b7705fe19b9b4ddee3e6a317a23 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1052,7 +1052,11 @@ alloc_encrypted: if (ret < 0) goto send_end; tls_ctx->pending_open_record_frags = true; - if (full_record || eor || sk_msg_full(msg_pl)) + + if (sk_msg_full(msg_pl)) + full_record = true; + + if (full_record || eor) goto copied; continue; } diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..3a0161194ccece5c22b753507c048d76bea698b0 --- /dev/null +++ b/samples/cgroup/.gitignore @@ -0,0 +1,3 @@ +/cgroup_event_listener +/memcg_event_listener + diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index e2a6a69352dfb775ebc7d6954c98943ec2a3097f..81220390851a396cb93cb66339130523d991eeaa 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -24,6 +24,41 @@ extern void my_tramp2(void *); static unsigned long my_ip = (unsigned long)schedule; +#ifdef CONFIG_RISCV +#include + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func1\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" + +" my_tramp2:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func2\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 2e349834d63c386ef54a8d3fecb2df713e7e4f2e..f943e40d57fd32ed3fd1c57c9b197ac916ebe33d 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -22,6 +22,47 @@ void my_direct_func2(unsigned long ip) extern void my_tramp1(void *); extern void my_tramp2(void *); +#ifdef CONFIG_RISCV +#include + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func1\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func2\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 9243dbfe4d0c1f72f7e9d55f5d7fb9631482018f..aed6df2927ce1833af8729810ea182ad5e492bfb 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -17,6 +17,31 @@ void my_direct_func(unsigned long ip) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index e39c3563ae4e42845aa8028aafa8fce394ab7759..6ff546a5d7eb05270683701b9693f3be2101b36d 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -19,6 +19,34 @@ void my_direct_func(struct vm_area_struct *vma, unsigned long address, extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-5*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" a1,1*"SZREG"(sp)\n" +" "REG_S" a2,2*"SZREG"(sp)\n" +" "REG_S" t0,3*"SZREG"(sp)\n" +" "REG_S" ra,4*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" a1,1*"SZREG"(sp)\n" +" "REG_L" a2,2*"SZREG"(sp)\n" +" "REG_L" t0,3*"SZREG"(sp)\n" +" "REG_L" ra,4*"SZREG"(sp)\n" +" addi sp,sp,5*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 32c477da1e9aa3719cd1ac6997d8e774e7d84658..ef0945670e1eb985da1397e7e496cf4a768e49b7 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -16,6 +16,30 @@ void my_direct_func(struct task_struct *p) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index 6aba02a31c96c52d24804aad834b9a169c8c318f..d0ee9001c7b376cb4310fd1d30b5ba162fa292c5 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -105,7 +105,7 @@ static int __init sample_trace_array_init(void) * NOTE: This function increments the reference counter * associated with the trace array - "tr". */ - tr = trace_array_get_by_name("sample-instance"); + tr = trace_array_get_by_name("sample-instance", "sched,timer,kprobes"); if (!tr) return -1; diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 2fe6f2828d3769a88da2a8ffcae91a64442fc9fb..ec3cf7a960f8f1116698c37c09ed56f461bcb00c 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -80,15 +80,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -# backward compatibility -KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS) - -ifeq ("$(origin W)", "command line") - KBUILD_EXTRA_WARN := $(W) -endif - -export KBUILD_EXTRA_WARN - # # W=1 - warnings which may be relevant and do not occur too often # @@ -106,7 +97,6 @@ KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow) KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow) KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) KBUILD_CPPFLAGS += -Wundef @@ -122,7 +112,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, restrict) KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) -KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) ifdef CONFIG_CC_IS_CLANG diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index c787edc9fe0953195c8aa03234693f273a18c6fb..da5aa5aed1e33ec5d0df5f294cba41a099b20370 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -83,8 +83,8 @@ dtb-$(CONFIG_OF_ALL_DTBS) += $(dtb-) multi-dtb-y := $(call multi-search, $(dtb-y), .dtb, -dtbs) # Primitive DTB compiled from *.dts real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs) -# Base DTB that overlay is applied onto (each first word of $(*-dtbs) expansion) -base-dtb-y := $(foreach m, $(multi-dtb-y), $(firstword $(call suffix-search, $m, .dtb, -dtbs))) +# Base DTB that overlay is applied onto +base-dtb-y := $(filter %.dtb, $(call real-search, $(multi-dtb-y), .dtb, -dtbs)) always-y += $(dtb-y) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 3addd1c0b989a0e9acd91a57184fd0342b804fda..a81dfb1f518106e50d8ebdb2014be1fcb028bba5 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -4,27 +4,6 @@ include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.lib -KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE)) -# Include only those top-level files that are needed by make, plus the GPL copy -TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \ - include init io_uring ipc kernel lib mm net rust \ - samples scripts security sound tools usr virt \ - .config Makefile \ - Kbuild Kconfig COPYING $(wildcard localversion*) - -quiet_cmd_src_tar = TAR $(2).tar.gz - cmd_src_tar = \ -if test "$(objtree)" != "$(srctree)"; then \ - echo >&2; \ - echo >&2 " ERROR:"; \ - echo >&2 " Building source tarball is not possible outside the"; \ - echo >&2 " kernel source tree. Don't set KBUILD_OUTPUT"; \ - echo >&2; \ - false; \ -fi ; \ -tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \ - --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3) - # Git # --------------------------------------------------------------------------- @@ -130,8 +109,6 @@ debian-orig: linux.tar$(debian-orig-suffix) debian cp $< ../$(orig-name); \ fi -KBUILD_PKG_ROOTCMD ?= 'fakeroot -u' - PHONY += deb-pkg srcdeb-pkg bindeb-pkg deb-pkg: private build-type := source,binary @@ -146,7 +123,7 @@ deb-pkg srcdeb-pkg bindeb-pkg: $(if $(findstring source, $(build-type)), \ --unsigned-source --compression=$(KDEB_SOURCE_COMPRESS)) \ $(if $(findstring binary, $(build-type)), \ - --rules-file='$(MAKE) -f debian/rules' --jobs=1 -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \ + -R'$(MAKE) -f debian/rules' -j1 -a$$(cat debian/arch), \ --no-check-builddeps) \ $(DPKG_FLAGS)) @@ -157,9 +134,8 @@ snap-pkg: rm -rf $(objtree)/snap mkdir $(objtree)/snap $(MAKE) clean - $(call cmd,src_tar,$(KERNELPATH)) sed "s@KERNELRELEASE@$(KERNELRELEASE)@; \ - s@SRCTREE@$(shell realpath $(KERNELPATH).tar.gz)@" \ + s@SRCTREE@$(abs_srctree)@" \ $(srctree)/scripts/package/snapcraft.template > \ $(objtree)/snap/snapcraft.yaml cd $(objtree)/snap && \ diff --git a/scripts/check-uapi.sh b/scripts/check-uapi.sh new file mode 100755 index 0000000000000000000000000000000000000000..955581735cb3c371fc6ea7043bf8845911f837c3 --- /dev/null +++ b/scripts/check-uapi.sh @@ -0,0 +1,573 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Script to check commits for UAPI backwards compatibility + +set -o errexit +set -o pipefail + +print_usage() { + name=$(basename "$0") + cat << EOF +$name - check for UAPI header stability across Git commits + +By default, the script will check to make sure the latest commit (or current +dirty changes) did not introduce ABI changes when compared to HEAD^1. You can +check against additional commit ranges with the -b and -p options. + +The script will not check UAPI headers for architectures other than the one +defined in ARCH. + +Usage: $name [-b BASE_REF] [-p PAST_REF] [-j N] [-l ERROR_LOG] [-i] [-q] [-v] + +Options: + -b BASE_REF Base git reference to use for comparison. If unspecified or empty, + will use any dirty changes in tree to UAPI files. If there are no + dirty changes, HEAD will be used. + -p PAST_REF Compare BASE_REF to PAST_REF (e.g. -p v6.1). If unspecified or empty, + will use BASE_REF^1. Must be an ancestor of BASE_REF. Only headers + that exist on PAST_REF will be checked for compatibility. + -j JOBS Number of checks to run in parallel (default: number of CPU cores). + -l ERROR_LOG Write error log to file (default: no error log is generated). + -i Ignore ambiguous changes that may or may not break UAPI compatibility. + -q Quiet operation. + -v Verbose operation (print more information about each header being checked). + +Environmental args: + ABIDIFF Custom path to abidiff binary + CC C compiler (default is "gcc") + ARCH Target architecture for the UAPI check (default is host arch) + +Exit codes: + $SUCCESS) Success + $FAIL_ABI) ABI difference detected + $FAIL_PREREQ) Prerequisite not met +EOF +} + +readonly SUCCESS=0 +readonly FAIL_ABI=1 +readonly FAIL_PREREQ=2 + +# Print to stderr +eprintf() { + # shellcheck disable=SC2059 + printf "$@" >&2 +} + +# Expand an array with a specific character (similar to Python string.join()) +join() { + local IFS="$1" + shift + printf "%s" "$*" +} + +# Create abidiff suppressions +gen_suppressions() { + # Common enum variant names which we don't want to worry about + # being shifted when new variants are added. + local -a enum_regex=( + ".*_AFTER_LAST$" + ".*_CNT$" + ".*_COUNT$" + ".*_END$" + ".*_LAST$" + ".*_MASK$" + ".*_MAX$" + ".*_MAX_BIT$" + ".*_MAX_BPF_ATTACH_TYPE$" + ".*_MAX_ID$" + ".*_MAX_SHIFT$" + ".*_NBITS$" + ".*_NETDEV_NUMHOOKS$" + ".*_NFT_META_IIFTYPE$" + ".*_NL80211_ATTR$" + ".*_NLDEV_NUM_OPS$" + ".*_NUM$" + ".*_NUM_ELEMS$" + ".*_NUM_IRQS$" + ".*_SIZE$" + ".*_TLSMAX$" + "^MAX_.*" + "^NUM_.*" + ) + + # Common padding field names which can be expanded into + # without worrying about users. + local -a padding_regex=( + ".*end$" + ".*pad$" + ".*pad[0-9]?$" + ".*pad_[0-9]?$" + ".*padding$" + ".*padding[0-9]?$" + ".*padding_[0-9]?$" + ".*res$" + ".*resv$" + ".*resv[0-9]?$" + ".*resv_[0-9]?$" + ".*reserved$" + ".*reserved[0-9]?$" + ".*reserved_[0-9]?$" + ".*rsvd[0-9]?$" + ".*unused$" + ) + + cat << EOF +[suppress_type] + type_kind = enum + changed_enumerators_regexp = $(join , "${enum_regex[@]}") +EOF + + for p in "${padding_regex[@]}"; do + cat << EOF +[suppress_type] + type_kind = struct + has_data_member_inserted_at = offset_of_first_data_member_regexp(${p}) +EOF + done + +if [ "$IGNORE_AMBIGUOUS_CHANGES" = "true" ]; then + cat << EOF +[suppress_type] + type_kind = struct + has_data_member_inserted_at = end + has_size_change = yes +EOF +fi +} + +# Check if git tree is dirty +tree_is_dirty() { + ! git diff --quiet +} + +# Get list of files installed in $ref +get_file_list() { + local -r ref="$1" + local -r tree="$(get_header_tree "$ref")" + + # Print all installed headers, filtering out ones that can't be compiled + find "$tree" -type f -name '*.h' -printf '%P\n' | grep -v -f "$INCOMPAT_LIST" +} + +# Add to the list of incompatible headers +add_to_incompat_list() { + local -r ref="$1" + + # Start with the usr/include/Makefile to get a list of the headers + # that don't compile using this method. + if [ ! -f usr/include/Makefile ]; then + eprintf "error - no usr/include/Makefile present at %s\n" "$ref" + eprintf "Note: usr/include/Makefile was added in the v5.3 kernel release\n" + exit "$FAIL_PREREQ" + fi + { + # shellcheck disable=SC2016 + printf 'all: ; @echo $(no-header-test)\n' + cat usr/include/Makefile + } | SRCARCH="$ARCH" make --always-make -f - | tr " " "\n" \ + | grep -v "asm-generic" >> "$INCOMPAT_LIST" + + # The makefile also skips all asm-generic files, but prints "asm-generic/%" + # which won't work for our grep match. Instead, print something grep will match. + printf "asm-generic/.*\.h\n" >> "$INCOMPAT_LIST" +} + +# Compile the simple test app +do_compile() { + local -r inc_dir="$1" + local -r header="$2" + local -r out="$3" + printf "int main(void) { return 0; }\n" | \ + "$CC" -c \ + -o "$out" \ + -x c \ + -O0 \ + -std=c90 \ + -fno-eliminate-unused-debug-types \ + -g \ + "-I${inc_dir}" \ + -include "$header" \ + - +} + +# Run make headers_install +run_make_headers_install() { + local -r ref="$1" + local -r install_dir="$(get_header_tree "$ref")" + make -j "$MAX_THREADS" ARCH="$ARCH" INSTALL_HDR_PATH="$install_dir" \ + headers_install > /dev/null +} + +# Install headers for both git refs +install_headers() { + local -r base_ref="$1" + local -r past_ref="$2" + + for ref in "$base_ref" "$past_ref"; do + printf "Installing user-facing UAPI headers from %s... " "${ref:-dirty tree}" + if [ -n "$ref" ]; then + git archive --format=tar --prefix="${ref}-archive/" "$ref" \ + | (cd "$TMP_DIR" && tar xf -) + ( + cd "${TMP_DIR}/${ref}-archive" + run_make_headers_install "$ref" + add_to_incompat_list "$ref" "$INCOMPAT_LIST" + ) + else + run_make_headers_install "$ref" + add_to_incompat_list "$ref" "$INCOMPAT_LIST" + fi + printf "OK\n" + done + sort -u -o "$INCOMPAT_LIST" "$INCOMPAT_LIST" + sed -i -e '/^$/d' "$INCOMPAT_LIST" +} + +# Print the path to the headers_install tree for a given ref +get_header_tree() { + local -r ref="$1" + printf "%s" "${TMP_DIR}/${ref}/usr" +} + +# Check file list for UAPI compatibility +check_uapi_files() { + local -r base_ref="$1" + local -r past_ref="$2" + local -r abi_error_log="$3" + + local passed=0; + local failed=0; + local -a threads=() + set -o errexit + + printf "Checking changes to UAPI headers between %s and %s...\n" "$past_ref" "${base_ref:-dirty tree}" + # Loop over all UAPI headers that were installed by $past_ref (if they only exist on $base_ref, + # there's no way they're broken and no way to compare anyway) + while read -r file; do + if [ "${#threads[@]}" -ge "$MAX_THREADS" ]; then + if wait "${threads[0]}"; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + threads=("${threads[@]:1}") + fi + + check_individual_file "$base_ref" "$past_ref" "$file" & + threads+=("$!") + done < <(get_file_list "$past_ref") + + for t in "${threads[@]}"; do + if wait "$t"; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + done + + if [ -n "$abi_error_log" ]; then + printf 'Generated by "%s %s" from git ref %s\n\n' \ + "$0" "$*" "$(git rev-parse HEAD)" > "$abi_error_log" + fi + + while read -r error_file; do + { + cat "$error_file" + printf "\n\n" + } | tee -a "${abi_error_log:-/dev/null}" >&2 + done < <(find "$TMP_DIR" -type f -name '*.error' | sort) + + total="$((passed + failed))" + if [ "$failed" -gt 0 ]; then + eprintf "error - %d/%d UAPI headers compatible with %s appear _not_ to be backwards compatible\n" \ + "$failed" "$total" "$ARCH" + if [ -n "$abi_error_log" ]; then + eprintf "Failure summary saved to %s\n" "$abi_error_log" + fi + else + printf "All %d UAPI headers compatible with %s appear to be backwards compatible\n" \ + "$total" "$ARCH" + fi + + return "$failed" +} + +# Check an individual file for UAPI compatibility +check_individual_file() { + local -r base_ref="$1" + local -r past_ref="$2" + local -r file="$3" + + local -r base_header="$(get_header_tree "$base_ref")/${file}" + local -r past_header="$(get_header_tree "$past_ref")/${file}" + + if [ ! -f "$base_header" ]; then + mkdir -p "$(dirname "$base_header")" + printf "==== UAPI header %s was removed between %s and %s ====" \ + "$file" "$past_ref" "$base_ref" \ + > "${base_header}.error" + return 1 + fi + + compare_abi "$file" "$base_header" "$past_header" "$base_ref" "$past_ref" +} + +# Perform the A/B compilation and compare output ABI +compare_abi() { + local -r file="$1" + local -r base_header="$2" + local -r past_header="$3" + local -r base_ref="$4" + local -r past_ref="$5" + local -r log="${TMP_DIR}/log/${file}.log" + local -r error_log="${TMP_DIR}/log/${file}.error" + + mkdir -p "$(dirname "$log")" + + if ! do_compile "$(get_header_tree "$base_ref")/include" "$base_header" "${base_header}.bin" 2> "$log"; then + { + warn_str=$(printf "==== Could not compile version of UAPI header %s at %s ====\n" \ + "$file" "$base_ref") + printf "%s\n" "$warn_str" + cat "$log" + printf -- "=%.0s" $(seq 0 ${#warn_str}) + } > "$error_log" + return 1 + fi + + if ! do_compile "$(get_header_tree "$past_ref")/include" "$past_header" "${past_header}.bin" 2> "$log"; then + { + warn_str=$(printf "==== Could not compile version of UAPI header %s at %s ====\n" \ + "$file" "$past_ref") + printf "%s\n" "$warn_str" + cat "$log" + printf -- "=%.0s" $(seq 0 ${#warn_str}) + } > "$error_log" + return 1 + fi + + local ret=0 + "$ABIDIFF" --non-reachable-types \ + --suppressions "$SUPPRESSIONS" \ + "${past_header}.bin" "${base_header}.bin" > "$log" || ret="$?" + if [ "$ret" -eq 0 ]; then + if [ "$VERBOSE" = "true" ]; then + printf "No ABI differences detected in %s from %s -> %s\n" \ + "$file" "$past_ref" "$base_ref" + fi + else + # Bits in abidiff's return code can be used to determine the type of error + if [ $((ret & 0x2)) -gt 0 ]; then + eprintf "error - abidiff did not run properly\n" + exit 1 + fi + + if [ "$IGNORE_AMBIGUOUS_CHANGES" = "true" ] && [ "$ret" -eq 4 ]; then + return 0 + fi + + # If the only changes were additions (not modifications to existing APIs), then + # there's no problem. Ignore these diffs. + if grep "Unreachable types summary" "$log" | grep -q "0 removed" && + grep "Unreachable types summary" "$log" | grep -q "0 changed"; then + return 0 + fi + + { + warn_str=$(printf "==== ABI differences detected in %s from %s -> %s ====" \ + "$file" "$past_ref" "$base_ref") + printf "%s\n" "$warn_str" + sed -e '/summary:/d' -e '/changed type/d' -e '/^$/d' -e 's/^/ /g' "$log" + printf -- "=%.0s" $(seq 0 ${#warn_str}) + if cmp "$past_header" "$base_header" > /dev/null 2>&1; then + printf "\n%s did not change between %s and %s...\n" "$file" "$past_ref" "${base_ref:-dirty tree}" + printf "It's possible a change to one of the headers it includes caused this error:\n" + grep '^#include' "$base_header" + printf "\n" + fi + } > "$error_log" + + return 1 + fi +} + +# Check that a minimum software version number is satisfied +min_version_is_satisfied() { + local -r min_version="$1" + local -r version_installed="$2" + + printf "%s\n%s\n" "$min_version" "$version_installed" \ + | sort -Vc > /dev/null 2>&1 +} + +# Make sure we have the tools we need and the arguments make sense +check_deps() { + ABIDIFF="${ABIDIFF:-abidiff}" + CC="${CC:-gcc}" + ARCH="${ARCH:-$(uname -m)}" + if [ "$ARCH" = "x86_64" ]; then + ARCH="x86" + fi + + local -r abidiff_min_version="2.4" + local -r libdw_min_version_if_clang="0.171" + + if ! command -v "$ABIDIFF" > /dev/null 2>&1; then + eprintf "error - abidiff not found!\n" + eprintf "Please install abigail-tools version %s or greater\n" "$abidiff_min_version" + eprintf "See: https://sourceware.org/libabigail/manual/libabigail-overview.html\n" + return 1 + fi + + local -r abidiff_version="$("$ABIDIFF" --version | cut -d ' ' -f 2)" + if ! min_version_is_satisfied "$abidiff_min_version" "$abidiff_version"; then + eprintf "error - abidiff version too old: %s\n" "$abidiff_version" + eprintf "Please install abigail-tools version %s or greater\n" "$abidiff_min_version" + eprintf "See: https://sourceware.org/libabigail/manual/libabigail-overview.html\n" + return 1 + fi + + if ! command -v "$CC" > /dev/null 2>&1; then + eprintf 'error - %s not found\n' "$CC" + return 1 + fi + + if "$CC" --version | grep -q clang; then + local -r libdw_version="$(ldconfig -v 2>/dev/null | grep -v SKIPPED | grep -m 1 -o 'libdw-[0-9]\+.[0-9]\+' | cut -c 7-)" + if ! min_version_is_satisfied "$libdw_min_version_if_clang" "$libdw_version"; then + eprintf "error - libdw version too old for use with clang: %s\n" "$libdw_version" + eprintf "Please install libdw from elfutils version %s or greater\n" "$libdw_min_version_if_clang" + eprintf "See: https://sourceware.org/elfutils/\n" + return 1 + fi + fi + + if [ ! -d "arch/${ARCH}" ]; then + eprintf 'error - ARCH "%s" is not a subdirectory under arch/\n' "$ARCH" + eprintf "Please set ARCH to one of:\n%s\n" "$(find arch -maxdepth 1 -mindepth 1 -type d -printf '%f ' | fmt)" + return 1 + fi + + if ! git rev-parse --is-inside-work-tree > /dev/null 2>&1; then + eprintf "error - this script requires the kernel tree to be initialized with Git\n" + return 1 + fi + + if ! git rev-parse --verify "$past_ref" > /dev/null 2>&1; then + printf 'error - invalid git reference "%s"\n' "$past_ref" + return 1 + fi + + if [ -n "$base_ref" ]; then + if ! git merge-base --is-ancestor "$past_ref" "$base_ref" > /dev/null 2>&1; then + printf 'error - "%s" is not an ancestor of base ref "%s"\n' "$past_ref" "$base_ref" + return 1 + fi + if [ "$(git rev-parse "$base_ref")" = "$(git rev-parse "$past_ref")" ]; then + printf 'error - "%s" and "%s" are the same reference\n' "$past_ref" "$base_ref" + return 1 + fi + fi +} + +run() { + local base_ref="$1" + local past_ref="$2" + local abi_error_log="$3" + shift 3 + + if [ -z "$KERNEL_SRC" ]; then + KERNEL_SRC="$(realpath "$(dirname "$0")"/..)" + fi + + cd "$KERNEL_SRC" + + if [ -z "$base_ref" ] && ! tree_is_dirty; then + base_ref=HEAD + fi + + if [ -z "$past_ref" ]; then + if [ -n "$base_ref" ]; then + past_ref="${base_ref}^1" + else + past_ref=HEAD + fi + fi + + if ! check_deps; then + exit "$FAIL_PREREQ" + fi + + TMP_DIR=$(mktemp -d) + readonly TMP_DIR + trap 'rm -rf "$TMP_DIR"' EXIT + + readonly INCOMPAT_LIST="${TMP_DIR}/incompat_list.txt" + touch "$INCOMPAT_LIST" + + readonly SUPPRESSIONS="${TMP_DIR}/suppressions.txt" + gen_suppressions > "$SUPPRESSIONS" + + # Run make install_headers for both refs + install_headers "$base_ref" "$past_ref" + + # Check for any differences in the installed header trees + if diff -r -q "$(get_header_tree "$base_ref")" "$(get_header_tree "$past_ref")" > /dev/null 2>&1; then + printf "No changes to UAPI headers were applied between %s and %s\n" "$past_ref" "${base_ref:-dirty tree}" + exit "$SUCCESS" + fi + + if ! check_uapi_files "$base_ref" "$past_ref" "$abi_error_log"; then + exit "$FAIL_ABI" + fi +} + +main() { + MAX_THREADS=$(nproc) + VERBOSE="false" + IGNORE_AMBIGUOUS_CHANGES="false" + quiet="false" + local base_ref="" + while getopts "hb:p:j:l:iqv" opt; do + case $opt in + h) + print_usage + exit "$SUCCESS" + ;; + b) + base_ref="$OPTARG" + ;; + p) + past_ref="$OPTARG" + ;; + j) + MAX_THREADS="$OPTARG" + ;; + l) + abi_error_log="$OPTARG" + ;; + i) + IGNORE_AMBIGUOUS_CHANGES="true" + ;; + q) + quiet="true" + VERBOSE="false" + ;; + v) + VERBOSE="true" + quiet="false" + ;; + *) + exit "$FAIL_PREREQ" + esac + done + + if [ "$quiet" = "true" ]; then + exec > /dev/null 2>&1 + fi + + run "$base_ref" "$past_ref" "$abi_error_log" "$@" +} + +main "$@" diff --git a/scripts/coccinelle/api/device_attr_show.cocci b/scripts/coccinelle/api/device_attr_show.cocci index a28dc061653aa0ea75195f7777fb5fbc38ea38c2..550d1d2fc02a9b95f453594268f4bf44045808e9 100644 --- a/scripts/coccinelle/api/device_attr_show.cocci +++ b/scripts/coccinelle/api/device_attr_show.cocci @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /// /// From Documentation/filesystems/sysfs.rst: -/// show() must not use snprintf() when formatting the value to be -/// returned to user space. If you can guarantee that an overflow -/// will never happen you can use sprintf() otherwise you must use -/// scnprintf(). +/// show() should only use sysfs_emit() or sysfs_emit_at() when formatting +/// the value to be returned to user space. /// // Confidence: High // Copyright: (C) 2020 Denis Efremov ISPRAS @@ -30,15 +28,16 @@ ssize_t show(struct device *dev, struct device_attribute *attr, char *buf) @rp depends on patch@ identifier show, dev, attr, buf; +expression BUF, SZ, FORMAT, STR; @@ ssize_t show(struct device *dev, struct device_attribute *attr, char *buf) { <... return -- snprintf -+ scnprintf - (...); +- snprintf(BUF, SZ, FORMAT ++ sysfs_emit(BUF, FORMAT + ,...); ...> } @@ -46,10 +45,10 @@ ssize_t show(struct device *dev, struct device_attribute *attr, char *buf) p << r.p; @@ -coccilib.report.print_report(p[0], "WARNING: use scnprintf or sprintf") +coccilib.report.print_report(p[0], "WARNING: please use sysfs_emit or sysfs_emit_at") @script: python depends on org@ p << r.p; @@ -coccilib.org.print_todo(p[0], "WARNING: use scnprintf or sprintf") +coccilib.org.print_todo(p[0], "WARNING: please use sysfs_emit or sysfs_emit_at") diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index aa5ab6251f763b9860a6128e8582d1ef4af91f6a..6793d6e86e777b576e9acac680acdd8020c3d105 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -82,21 +82,12 @@ LxPs() thread_info_type = utils.CachedType("struct thread_info") -ia64_task_size = None - def get_thread_info(task): thread_info_ptr_type = thread_info_type.get_type().pointer() - if utils.is_target_arch("ia64"): - global ia64_task_size - if ia64_task_size is None: - ia64_task_size = gdb.parse_and_eval("sizeof(struct task_struct)") - thread_info_addr = task.address + ia64_task_size - thread_info = thread_info_addr.cast(thread_info_ptr_type) - else: - if task.type.fields()[0].type == thread_info_type.get_type(): - return task['thread_info'] - thread_info = task['stack'].cast(thread_info_ptr_type) + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] + thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 3c6cbe2b278d302ebd6375e900dbe4875765805f..0da52b548ba50f5e1333c3c2b2a18da2533a18a1 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -161,6 +161,13 @@ fn main() { ts.push("features", features); ts.push("llvm-target", "x86_64-linux-gnu"); ts.push("target-pointer-width", "64"); + } else if cfg.has("LOONGARCH") { + ts.push("arch", "loongarch64"); + ts.push("data-layout", "e-m:e-p:64:64-i64:64-i128:128-n64-S128"); + ts.push("features", "-f,-d"); + ts.push("llvm-target", "loongarch64-linux-gnusf"); + ts.push("llvm-abiname", "lp64s"); + ts.push("target-pointer-width", "64"); } else { panic!("Unsupported architecture"); } diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index f5dfdb9d80e9d5bb66f7e5e0f5b5cb4aac069d9b..f3901c55df239df5a7d88e974bae8e388d2c00ae 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -16,9 +16,7 @@ #include #include #include -#ifdef __GNU_LIBRARY__ #include -#endif /* __GNU_LIBRARY__ */ #include "genksyms.h" /*----------------------------------------------------------------------*/ @@ -718,8 +716,6 @@ void error_with_pos(const char *fmt, ...) static void genksyms_usage(void) { fputs("Usage:\n" "genksyms [-adDTwqhVR] > /path/to/.tmp_obj.ver\n" "\n" -#ifdef __GNU_LIBRARY__ - " -s, --symbol-prefix Select symbol prefix\n" " -d, --debug Increment the debug level (repeatable)\n" " -D, --dump Dump expanded symbol defs (for debugging only)\n" " -r, --reference file Read reference symbols from a file\n" @@ -729,18 +725,6 @@ static void genksyms_usage(void) " -q, --quiet Disable warnings (default)\n" " -h, --help Print this message\n" " -V, --version Print the release version\n" -#else /* __GNU_LIBRARY__ */ - " -s Select symbol prefix\n" - " -d Increment the debug level (repeatable)\n" - " -D Dump expanded symbol defs (for debugging only)\n" - " -r file Read reference symbols from a file\n" - " -T file Dump expanded types into file\n" - " -p Preserve reference modversions or fail\n" - " -w Enable warnings\n" - " -q Disable warnings (default)\n" - " -h Print this message\n" - " -V Print the release version\n" -#endif /* __GNU_LIBRARY__ */ , stderr); } @@ -749,7 +733,6 @@ int main(int argc, char **argv) FILE *dumpfile = NULL, *ref_file = NULL; int o; -#ifdef __GNU_LIBRARY__ struct option long_opts[] = { {"debug", 0, 0, 'd'}, {"warnings", 0, 0, 'w'}, @@ -763,11 +746,8 @@ int main(int argc, char **argv) {0, 0, 0, 0} }; - while ((o = getopt_long(argc, argv, "s:dwqVDr:T:ph", + while ((o = getopt_long(argc, argv, "dwqVDr:T:ph", &long_opts[0], NULL)) != EOF) -#else /* __GNU_LIBRARY__ */ - while ((o = getopt(argc, argv, "s:dwqVDr:T:ph")) != EOF) -#endif /* __GNU_LIBRARY__ */ switch (o) { case 'd': flag_debug++; diff --git a/scripts/git.orderFile b/scripts/git.orderFile new file mode 100644 index 0000000000000000000000000000000000000000..5102ba73357f0029b6623034fc97b6cc550461b7 --- /dev/null +++ b/scripts/git.orderFile @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 + +# order file for git, to produce patches which are easier to review +# by diffing the important stuff like header changes first. +# +# one-off usage: +# git diff -O scripts/git.orderFile ... +# +# add to git config: +# git config diff.orderFile scripts/git.orderFile +# + +MAINTAINERS + +# Documentation +Documentation/* +*.rst + +# git-specific +.gitignore +scripts/git.orderFile + +# build system +Kconfig* +*/Kconfig* +Kbuild* +*/Kbuild* +Makefile* +*/Makefile* +*.mak +*.mk +scripts/* + +# semantic patches +*.cocci + +# headers +*types.h +*.h + +# code +*.c diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 26359968744ef1e9d5e40937e3ac4055d3a7bf2a..890f69005bab41c6d0977f2a3e95de5143d4fbba 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -17,7 +17,6 @@ arch/arm/kernel/head-nommu.o arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o -arch/ia64/kernel/head.o arch/loongarch/kernel/head.o arch/m68k/68000/head.o arch/m68k/coldfire/head.o diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 4eee155121a8b37c201e70c8327c3ea556d5cd94..ea1bf3b3dbde1bc463abbc8640537f873adc830f 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -27,6 +27,14 @@ KCONFIG_DEFCONFIG_LIST += \ endif KCONFIG_DEFCONFIG_LIST += arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) +ifneq ($(findstring c, $(KBUILD_EXTRA_WARN)),) +export KCONFIG_WARN_UNKNOWN_SYMBOLS=1 +endif + +ifneq ($(findstring e, $(KBUILD_EXTRA_WARN)),) +export KCONFIG_WERROR=1 +endif + # We need this, in case the user has it in its environment unexport CONFIG_ @@ -99,7 +107,7 @@ config-fragments = $(call configfiles,$@) %.config: $(obj)/conf $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture)) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(config-fragments) + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig PHONY += tinyconfig @@ -166,7 +174,7 @@ conf-objs := conf.o $(common-objs) # nconf: Used for the nconfig target based on ncurses hostprogs += nconf -nconf-objs := nconf.o nconf.gui.o $(common-objs) +nconf-objs := nconf.o nconf.gui.o mnconf-common.o $(common-objs) HOSTLDLIBS_nconf = $(call read-file, $(obj)/nconf-libs) HOSTCFLAGS_nconf.o = $(call read-file, $(obj)/nconf-cflags) @@ -179,7 +187,7 @@ $(obj)/nconf.o $(obj)/nconf.gui.o: | $(obj)/nconf-cflags hostprogs += mconf lxdialog := $(addprefix lxdialog/, \ checklist.o inputbox.o menubox.o textbox.o util.o yesno.o) -mconf-objs := mconf.o $(lxdialog) $(common-objs) +mconf-objs := mconf.o $(lxdialog) mnconf-common.o $(common-objs) HOSTLDLIBS_mconf = $(call read-file, $(obj)/mconf-libs) $(foreach f, mconf.o $(lxdialog), \ diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 33d19e419908b8315603f04db41189ed9c506a0c..662a5e7c37c28539ce7104085478c718848c4fc3 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -827,6 +827,9 @@ int main(int ac, char **av) break; } + if (conf_errors()) + exit(1); + if (sync_kconfig) { name = getenv("KCONFIG_NOSILENTUPDATE"); if (name && *name) { @@ -890,6 +893,9 @@ int main(int ac, char **av) break; } + if (sym_dep_errors()) + exit(1); + if (input_mode == savedefconfig) { if (conf_write_defconfig(defconfig_file)) { fprintf(stderr, "n*** Error while saving defconfig to: %s\n\n", diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 4a6811d77d182964d1ec73c34a87c28670dec692..f53dcdd445976aa8759e13e6e3103ef02f76ead1 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -155,6 +155,13 @@ static void conf_message(const char *fmt, ...) static const char *conf_filename; static int conf_lineno, conf_warnings; +bool conf_errors(void) +{ + if (conf_warnings) + return getenv("KCONFIG_WERROR"); + return false; +} + static void conf_warning(const char *fmt, ...) { va_list ap; @@ -289,16 +296,12 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) #define LINE_GROWTH 16 static int add_byte(int c, char **lineptr, size_t slen, size_t *n) { - char *nline; size_t new_size = slen + 1; + if (new_size > *n) { new_size += LINE_GROWTH - 1; new_size *= 2; - nline = xrealloc(*lineptr, new_size); - if (!nline) - return -1; - - *lineptr = nline; + *lineptr = xrealloc(*lineptr, new_size); *n = new_size; } @@ -341,19 +344,37 @@ e_out: return -1; } +/* like getline(), but the newline character is stripped away */ +static ssize_t getline_stripped(char **lineptr, size_t *n, FILE *stream) +{ + ssize_t len; + + len = compat_getline(lineptr, n, stream); + + if (len > 0 && (*lineptr)[len - 1] == '\n') { + len--; + (*lineptr)[len] = '\0'; + + if (len > 0 && (*lineptr)[len - 1] == '\r') { + len--; + (*lineptr)[len] = '\0'; + } + } + + return len; +} + int conf_read_simple(const char *name, int def) { FILE *in = NULL; char *line = NULL; size_t line_asize = 0; - char *p, *p2; + char *p, *val; struct symbol *sym; int i, def_flags; - const char *warn_unknown; - const char *werror; + const char *warn_unknown, *sym_name; warn_unknown = getenv("KCONFIG_WARN_UNKNOWN_SYMBOLS"); - werror = getenv("KCONFIG_WERROR"); if (name) { in = zconf_fopen(name); } else { @@ -417,8 +438,7 @@ load: case S_INT: case S_HEX: case S_STRING: - if (sym->def[def].val) - free(sym->def[def].val); + free(sym->def[def].val); /* fall through */ default: sym->def[def].val = NULL; @@ -426,90 +446,68 @@ load: } } - while (compat_getline(&line, &line_asize, in) != -1) { + while (getline_stripped(&line, &line_asize, in) != -1) { conf_lineno++; - sym = NULL; + + if (!line[0]) /* blank line */ + continue; + if (line[0] == '#') { - if (memcmp(line + 2, CONFIG_, strlen(CONFIG_))) + if (line[1] != ' ') + continue; + p = line + 2; + if (memcmp(p, CONFIG_, strlen(CONFIG_))) continue; - p = strchr(line + 2 + strlen(CONFIG_), ' '); + sym_name = p + strlen(CONFIG_); + p = strchr(sym_name, ' '); if (!p) continue; *p++ = 0; - if (strncmp(p, "is not set", 10)) + if (strcmp(p, "is not set")) continue; - if (def == S_DEF_USER) { - sym = sym_find(line + 2 + strlen(CONFIG_)); - if (!sym) { - if (warn_unknown) - conf_warning("unknown symbol: %s", - line + 2 + strlen(CONFIG_)); - - conf_set_changed(true); - continue; - } - } else { - sym = sym_lookup(line + 2 + strlen(CONFIG_), 0); - if (sym->type == S_UNKNOWN) - sym->type = S_BOOLEAN; - } - if (sym->flags & def_flags) { - conf_warning("override: reassigning to symbol %s", sym->name); - } - switch (sym->type) { - case S_BOOLEAN: - case S_TRISTATE: - sym->def[def].tri = no; - sym->flags |= def_flags; - break; - default: - ; - } - } else if (memcmp(line, CONFIG_, strlen(CONFIG_)) == 0) { - p = strchr(line + strlen(CONFIG_), '='); - if (!p) + + val = "n"; + } else { + if (memcmp(line, CONFIG_, strlen(CONFIG_))) { + conf_warning("unexpected data: %s", line); continue; - *p++ = 0; - p2 = strchr(p, '\n'); - if (p2) { - *p2-- = 0; - if (*p2 == '\r') - *p2 = 0; } - sym = sym_find(line + strlen(CONFIG_)); - if (!sym) { - if (def == S_DEF_AUTO) { - /* - * Reading from include/config/auto.conf - * If CONFIG_FOO previously existed in - * auto.conf but it is missing now, - * include/config/FOO must be touched. - */ - conf_touch_dep(line + strlen(CONFIG_)); - } else { - if (warn_unknown) - conf_warning("unknown symbol: %s", - line + strlen(CONFIG_)); - - conf_set_changed(true); - } + sym_name = line + strlen(CONFIG_); + p = strchr(sym_name, '='); + if (!p) { + conf_warning("unexpected data: %s", line); continue; } + *p = 0; + val = p + 1; + } - if (sym->flags & def_flags) { - conf_warning("override: reassigning to symbol %s", sym->name); - } - if (conf_set_sym_val(sym, def, def_flags, p)) - continue; - } else { - if (line[0] != '\r' && line[0] != '\n') - conf_warning("unexpected data: %.*s", - (int)strcspn(line, "\r\n"), line); + sym = sym_find(sym_name); + if (!sym) { + if (def == S_DEF_AUTO) { + /* + * Reading from include/config/auto.conf. + * If CONFIG_FOO previously existed in auto.conf + * but it is missing now, include/config/FOO + * must be touched. + */ + conf_touch_dep(sym_name); + } else { + if (warn_unknown) + conf_warning("unknown symbol: %s", sym_name); + conf_set_changed(true); + } continue; } + if (sym->flags & def_flags) + conf_warning("override: reassigning to symbol %s", sym->name); + + if (conf_set_sym_val(sym, def, def_flags, val)) + continue; + if (sym && sym_is_choice_value(sym)) { struct symbol *cs = prop_get_symbol(sym_get_choice_prop(sym)); switch (sym->def[def].tri) { @@ -533,9 +531,6 @@ load: free(line); fclose(in); - if (conf_warnings && werror) - exit(1); - return 0; } @@ -594,7 +589,7 @@ int conf_read(const char *name) /* Reset a string value if it's out of range */ if (sym_string_within_range(sym, sym->def[S_DEF_USER].val)) break; - sym->flags &= ~(SYMBOL_VALID|SYMBOL_DEF_USER); + sym->flags &= ~SYMBOL_VALID; conf_unsaved++; break; default: diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index 81ebf8108ca748893d469c77af1eebf8ecbf7df2..a290de36307ba8abe184a915fb0a6b6a3b29bbb6 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -1131,7 +1131,6 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2) default: return -1; } - printf("[%dgt%d?]", t1, t2); return 0; } diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 471a59acecec61c3a8f7e141a7f6c6654aae21c6..5cdc8f5e6446ab55e42ce21dfbe728b0ab22aee7 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -99,8 +99,6 @@ bool menu_is_visible(struct menu *menu); bool menu_has_prompt(struct menu *menu); const char *menu_get_prompt(struct menu *menu); struct menu *menu_get_parent_menu(struct menu *menu); -bool menu_has_help(struct menu *menu); -const char *menu_get_help(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); void menu_get_ext_help(struct menu *menu, struct gstr *help); diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index edd1e617b25c5c3683ba4287f108ccb690608d10..a4ae5e9eadadb8758b5911d0f38c028b0622d937 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LKC_PROTO_H +#define LKC_PROTO_H + #include /* confdata.c */ @@ -12,6 +15,7 @@ void conf_set_changed(bool val); bool conf_get_changed(void); void conf_set_changed_callback(void (*fn)(void)); void conf_set_message_callback(void (*fn)(const char *s)); +bool conf_errors(void); /* symbol.c */ extern struct symbol * symbol_hash[SYMBOL_HASHSIZE]; @@ -22,6 +26,7 @@ void print_symbol_for_listconfig(struct symbol *sym); struct symbol ** sym_re_search(const char *pattern); const char * sym_type_name(enum symbol_type type); void sym_calc_value(struct symbol *sym); +bool sym_dep_errors(void); enum symbol_type sym_get_type(struct symbol *sym); bool sym_tristate_within_range(struct symbol *sym,tristate tri); bool sym_set_tristate_value(struct symbol *sym,tristate tri); @@ -50,3 +55,5 @@ char *expand_one_token(const char **str); /* expr.c */ void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char *), void *data, int prevtoken); + +#endif /* LKC_PROTO_H */ diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index eccc87a441e713a8b9013e3b286a176688ece372..5df32148a86951f78dca309db7d06d77f2364259 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -21,6 +21,7 @@ #include "lkc.h" #include "lxdialog/dialog.h" +#include "mnconf-common.h" static const char mconf_readme[] = "Overview\n" @@ -247,7 +248,7 @@ search_help[] = " -> PCI support (PCI [=y])\n" "(1) -> PCI access mode ( [=y])\n" " Defined at drivers/pci/Kconfig:47\n" - " Depends on: X86_LOCAL_APIC && X86_IO_APIC || IA64\n" + " Depends on: X86_LOCAL_APIC && X86_IO_APIC\n" " Selects: LIBCRC32\n" " Selected by: BAR [=n]\n" "-----------------------------------------------------------------\n" @@ -286,7 +287,6 @@ static int single_menu_mode; static int show_all_options; static int save_and_exit; static int silent; -static int jump_key_char; static void conf(struct menu *menu, struct menu *active_menu); @@ -378,58 +378,6 @@ static void show_help(struct menu *menu) str_free(&help); } -struct search_data { - struct list_head *head; - struct menu *target; -}; - -static int next_jump_key(int key) -{ - if (key < '1' || key > '9') - return '1'; - - key++; - - if (key > '9') - key = '1'; - - return key; -} - -static int handle_search_keys(int key, size_t start, size_t end, void *_data) -{ - struct search_data *data = _data; - struct jump_key *pos; - int index = 0; - - if (key < '1' || key > '9') - return 0; - - list_for_each_entry(pos, data->head, entries) { - index = next_jump_key(index); - - if (pos->offset < start) - continue; - - if (pos->offset >= end) - break; - - if (key == index) { - data->target = pos->target; - return 1; - } - } - - return 0; -} - -int get_jump_key_char(void) -{ - jump_key_char = next_jump_key(jump_key_char); - - return jump_key_char; -} - static void search_conf(void) { struct symbol **sym_arr; diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 61c442d84aef4a0dd11a2512314fb2d5bea169d2..2cce8b651f6154197af3b5332a64ea3ecdac344d 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -673,19 +673,6 @@ struct menu *menu_get_parent_menu(struct menu *menu) return menu; } -bool menu_has_help(struct menu *menu) -{ - return menu->help != NULL; -} - -const char *menu_get_help(struct menu *menu) -{ - if (menu->help) - return menu->help; - else - return ""; -} - static void get_def_str(struct gstr *r, struct menu *menu) { str_printf(r, "Defined at %s:%d\n", @@ -856,10 +843,10 @@ void menu_get_ext_help(struct menu *menu, struct gstr *help) struct symbol *sym = menu->sym; const char *help_text = nohelp_text; - if (menu_has_help(menu)) { + if (menu->help) { if (sym->name) str_printf(help, "%s%s:\n\n", CONFIG_, sym->name); - help_text = menu_get_help(menu); + help_text = menu->help; } str_printf(help, "%s\n", help_text); if (sym) diff --git a/scripts/kconfig/mnconf-common.c b/scripts/kconfig/mnconf-common.c new file mode 100644 index 0000000000000000000000000000000000000000..18cb9a6c5aaad648c4b7f04c5457bd4442064226 --- /dev/null +++ b/scripts/kconfig/mnconf-common.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "expr.h" +#include "list.h" +#include "mnconf-common.h" + +int jump_key_char; + +int next_jump_key(int key) +{ + if (key < '1' || key > '9') + return '1'; + + key++; + + if (key > '9') + key = '1'; + + return key; +} + +int handle_search_keys(int key, size_t start, size_t end, void *_data) +{ + struct search_data *data = _data; + struct jump_key *pos; + int index = 0; + + if (key < '1' || key > '9') + return 0; + + list_for_each_entry(pos, data->head, entries) { + index = next_jump_key(index); + + if (pos->offset < start) + continue; + + if (pos->offset >= end) + break; + + if (key == index) { + data->target = pos->target; + return 1; + } + } + + return 0; +} + +int get_jump_key_char(void) +{ + jump_key_char = next_jump_key(jump_key_char); + + return jump_key_char; +} diff --git a/scripts/kconfig/mnconf-common.h b/scripts/kconfig/mnconf-common.h new file mode 100644 index 0000000000000000000000000000000000000000..ab6292cc4bf255425ea782611389dce90f2522fe --- /dev/null +++ b/scripts/kconfig/mnconf-common.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef MNCONF_COMMON_H +#define MNCONF_COMMON_H + +#include + +struct search_data { + struct list_head *head; + struct menu *target; +}; + +extern int jump_key_char; + +int next_jump_key(int key); +int handle_search_keys(int key, size_t start, size_t end, void *_data); +int get_jump_key_char(void); + +#endif /* MNCONF_COMMON_H */ diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index 143a2c351d5764b5e9e1b175ad16bb40d3b5d2aa..1148163cfa7e71c037ba28eab8183cf304fc5b85 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -12,6 +12,7 @@ #include #include "lkc.h" +#include "mnconf-common.h" #include "nconf.h" #include @@ -216,7 +217,7 @@ search_help[] = "Symbol: FOO [ = m]\n" "Prompt: Foo bus is used to drive the bar HW\n" "Defined at drivers/pci/Kconfig:47\n" -"Depends on: X86_LOCAL_APIC && X86_IO_APIC || IA64\n" +"Depends on: X86_LOCAL_APIC && X86_IO_APIC\n" "Location:\n" " -> Bus options (PCI, PCMCIA, EISA, ISA)\n" " -> PCI support (PCI [ = y])\n" @@ -279,7 +280,6 @@ static const char *current_instructions = menu_instructions; static char *dialog_input_result; static int dialog_input_result_len; -static int jump_key_char; static void selected_conf(struct menu *menu, struct menu *active_menu); static void conf(struct menu *menu); @@ -691,57 +691,6 @@ static int do_exit(void) return 0; } -struct search_data { - struct list_head *head; - struct menu *target; -}; - -static int next_jump_key(int key) -{ - if (key < '1' || key > '9') - return '1'; - - key++; - - if (key > '9') - key = '1'; - - return key; -} - -static int handle_search_keys(int key, size_t start, size_t end, void *_data) -{ - struct search_data *data = _data; - struct jump_key *pos; - int index = 0; - - if (key < '1' || key > '9') - return 0; - - list_for_each_entry(pos, data->head, entries) { - index = next_jump_key(index); - - if (pos->offset < start) - continue; - - if (pos->offset >= end) - break; - - if (key == index) { - data->target = pos->target; - return 1; - } - } - - return 0; -} - -int get_jump_key_char(void) -{ - jump_key_char = next_jump_key(jump_key_char); - - return jump_key_char; -} static void search_conf(void) { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index a76925b46ce6309439ec0a554775dbbf2dd445cd..3e808528aaeab2625424b56247eed97fa107232d 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -29,14 +29,9 @@ struct symbol symbol_no = { .flags = SYMBOL_CONST|SYMBOL_VALID, }; -static struct symbol symbol_empty = { - .name = "", - .curr = { "", no }, - .flags = SYMBOL_VALID, -}; - struct symbol *modules_sym; static tristate modules_val; +static int sym_warnings; enum symbol_type sym_get_type(struct symbol *sym) { @@ -317,6 +312,14 @@ static void sym_warn_unmet_dep(struct symbol *sym) " Selected by [m]:\n"); fputs(str_get(&gs), stderr); + sym_warnings++; +} + +bool sym_dep_errors(void) +{ + if (sym_warnings) + return getenv("KCONFIG_WERROR"); + return false; } void sym_calc_value(struct symbol *sym) @@ -344,9 +347,13 @@ void sym_calc_value(struct symbol *sym) switch (sym->type) { case S_INT: + newval.val = "0"; + break; case S_HEX: + newval.val = "0x0"; + break; case S_STRING: - newval = symbol_empty.curr; + newval.val = ""; break; case S_BOOLEAN: case S_TRISTATE: @@ -697,13 +704,12 @@ const char *sym_get_string_default(struct symbol *sym) { struct property *prop; struct symbol *ds; - const char *str; + const char *str = ""; tristate val; sym_calc_visibility(sym); sym_calc_value(modules_sym); val = symbol_no.curr.tri; - str = symbol_empty.curr.val; /* If symbol has a default value look it up */ prop = sym_get_default_prop(sym); @@ -753,14 +759,17 @@ const char *sym_get_string_default(struct symbol *sym) case yes: return "y"; } case S_INT: + if (!str[0]) + str = "0"; + break; case S_HEX: - return str; - case S_STRING: - return str; - case S_UNKNOWN: + if (!str[0]) + str = "0x0"; + break; + default: break; } - return ""; + return str; } const char *sym_get_string_value(struct symbol *sym) diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index b78f114ad48cc5bac6e57246f8a2df4dabc0271d..92e5b2b9761d70966279ac0159769adf580110fa 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -42,8 +42,7 @@ struct gstr str_new(void) /* Free storage for growable string */ void str_free(struct gstr *gs) { - if (gs->s) - free(gs->s); + free(gs->s); gs->s = NULL; gs->len = 0; } diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index c62066825f538c4ffe048091f68d5892c0d8267d..9faa4d3d91e3586e20bed71a50893c6c959252ad 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -26,6 +26,8 @@ gcc) llvm) if [ "$SRCARCH" = s390 ]; then echo 15.0.0 + elif [ "$SRCARCH" = loongarch ]; then + echo 18.0.0 else echo 11.0.0 fi diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 250bf5993203c13b9452f4ab227d9155d1a7fb60..12f7c5d22930e2bb9da31b76c0be4d99f76d4e74 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -62,8 +62,7 @@ static unsigned int nr_unresolved; #define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) -void __attribute__((format(printf, 2, 3))) -modpost_log(enum loglevel loglevel, const char *fmt, ...) +void modpost_log(enum loglevel loglevel, const char *fmt, ...) { va_list arglist; @@ -93,6 +92,9 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...) error_occurred = true; } +void __attribute__((alias("modpost_log"))) +modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); + static inline bool strends(const char *str, const char *postfix) { if (strlen(str) < strlen(postfix)) @@ -476,11 +478,9 @@ static int parse_elf(struct elf_info *info, const char *filename) fatal("%s: not relocatable object.", filename); /* Check if file offset is correct */ - if (hdr->e_shoff > info->size) { + if (hdr->e_shoff > info->size) fatal("section header offset=%lu in file '%s' is bigger than filesize=%zu\n", (unsigned long)hdr->e_shoff, filename, info->size); - return 0; - } if (hdr->e_shnum == SHN_UNDEF) { /* @@ -518,12 +518,11 @@ static int parse_elf(struct elf_info *info, const char *filename) const char *secname; int nobits = sechdrs[i].sh_type == SHT_NOBITS; - if (!nobits && sechdrs[i].sh_offset > info->size) { + if (!nobits && sechdrs[i].sh_offset > info->size) fatal("%s is truncated. sechdrs[i].sh_offset=%lu > sizeof(*hrd)=%zu\n", filename, (unsigned long)sechdrs[i].sh_offset, sizeof(*hdr)); - return 0; - } + secname = secstrings + sechdrs[i].sh_name; if (strcmp(secname, ".modinfo") == 0) { if (nobits) @@ -1348,6 +1347,14 @@ static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type) #define R_LARCH_SUB32 55 #endif +#ifndef R_LARCH_RELAX +#define R_LARCH_RELAX 100 +#endif + +#ifndef R_LARCH_ALIGN +#define R_LARCH_ALIGN 102 +#endif + static void get_rel_type_and_sym(struct elf_info *elf, uint64_t r_info, unsigned int *r_type, unsigned int *r_sym) { @@ -1402,9 +1409,16 @@ static void section_rela(struct module *mod, struct elf_info *elf, continue; break; case EM_LOONGARCH: - if (!strcmp("__ex_table", fromsec) && - r_type == R_LARCH_SUB32) + switch (r_type) { + case R_LARCH_SUB32: + if (!strcmp("__ex_table", fromsec)) + continue; + break; + case R_LARCH_RELAX: + case R_LARCH_ALIGN: + /* These relocs do not refer to symbols */ continue; + } break; } @@ -1421,7 +1435,7 @@ static void section_rel(struct module *mod, struct elf_info *elf, for (rel = start; rel < stop; rel++) { Elf_Sym *tsym; - Elf_Addr taddr = 0, r_offset; + Elf_Addr taddr, r_offset; unsigned int r_type, r_sym; void *loc; diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 69baf014da4fdaa25989716f7686fadc43b8a603..835cababf1b09eb2353f8777f934dfabf3731454 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -197,7 +197,11 @@ enum loglevel { LOG_FATAL }; -void modpost_log(enum loglevel loglevel, const char *fmt, ...); +void __attribute__((format(printf, 2, 3))) +modpost_log(enum loglevel loglevel, const char *fmt, ...); + +void __attribute__((format(printf, 2, 3), noreturn)) +modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); /* * warn - show the given message, then let modpost continue running, still @@ -214,4 +218,4 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...); */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) -#define fatal(fmt, args...) modpost_log(LOG_FATAL, fmt, ##args) +#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index d7dd0d04c70c9982bae9b86e54bf52dba295a2fd..bf96a3c2460814febe85a0a49fe2e9a8e90ea1ad 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -25,35 +25,20 @@ if_enabled_echo() { } create_package() { - local pname="$1" pdir="$2" - local dpkg_deb_opts - - mkdir -m 755 -p "$pdir/DEBIAN" - mkdir -p "$pdir/usr/share/doc/$pname" - cp debian/copyright "$pdir/usr/share/doc/$pname/" - cp debian/changelog "$pdir/usr/share/doc/$pname/changelog.Debian" - gzip -n -9 "$pdir/usr/share/doc/$pname/changelog.Debian" - sh -c "cd '$pdir'; find . -type f ! -path './DEBIAN/*' -printf '%P\0' \ - | xargs -r0 md5sum > DEBIAN/md5sums" - - # Fix ownership and permissions - if [ "$DEB_RULES_REQUIRES_ROOT" = "no" ]; then - dpkg_deb_opts="--root-owner-group" - else - chown -R root:root "$pdir" - fi - # a+rX in case we are in a restrictive umask environment like 0077 - # ug-s in case we build in a setuid/setgid directory - chmod -R go-w,a+rX,ug-s "$pdir" - - # Create the package - dpkg-gencontrol -p$pname -P"$pdir" - dpkg-deb $dpkg_deb_opts ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS} --build "$pdir" .. + export DH_OPTIONS="-p${1}" + + dh_installdocs + dh_installchangelogs + dh_compress + dh_fixperms + dh_gencontrol + dh_md5sums + dh_builddeb -- ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS} } install_linux_image () { - pdir=$1 - pname=$2 + pname=$1 + pdir=debian/$1 rm -rf ${pdir} @@ -62,7 +47,7 @@ install_linux_image () { ${MAKE} -f ${srctree}/Makefile INSTALL_DTBS_PATH="${pdir}/usr/lib/linux-image-${KERNELRELEASE}" dtbs_install fi - ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" modules_install + ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" INSTALL_MOD_STRIP=1 modules_install rm -f "${pdir}/lib/modules/${KERNELRELEASE}/build" # Install the kernel @@ -122,26 +107,22 @@ install_linux_image () { } install_linux_image_dbg () { - pdir=$1 - image_pdir=$2 + pdir=debian/$1 rm -rf ${pdir} - for module in $(find ${image_pdir}/lib/modules/ -name *.ko -printf '%P\n'); do - module=lib/modules/${module} - mkdir -p $(dirname ${pdir}/usr/lib/debug/${module}) - # only keep debug symbols in the debug file - ${OBJCOPY} --only-keep-debug ${image_pdir}/${module} ${pdir}/usr/lib/debug/${module} - # strip original module from debug symbols - ${OBJCOPY} --strip-debug ${image_pdir}/${module} - # then add a link to those - ${OBJCOPY} --add-gnu-debuglink=${pdir}/usr/lib/debug/${module} ${image_pdir}/${module} - done + # Parse modules.order directly because 'make modules_install' may sign, + # compress modules, and then run unneeded depmod. + while read -r mod; do + mod="${mod%.o}.ko" + dbg="${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/kernel/${mod}" + buildid=$("${READELF}" -n "${mod}" | sed -n 's@^.*Build ID: \(..\)\(.*\)@\1/\2@p') + link="${pdir}/usr/lib/debug/.build-id/${buildid}.debug" - # re-sign stripped modules - if is_enabled CONFIG_MODULE_SIG_ALL; then - ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${image_pdir}" modules_sign - fi + mkdir -p "${dbg%/*}" "${link%/*}" + "${OBJCOPY}" --only-keep-debug "${mod}" "${dbg}" + ln -sf --relative "${dbg}" "${link}" + done < modules.order # Build debug package # Different tools want the image in different locations @@ -156,8 +137,8 @@ install_linux_image_dbg () { } install_kernel_headers () { - pdir=$1 - version=$2 + pdir=debian/$1 + version=${1#linux-headers-} rm -rf $pdir @@ -168,18 +149,16 @@ install_kernel_headers () { } install_libc_headers () { - pdir=$1 + pdir=debian/$1 rm -rf $pdir - $MAKE -f $srctree/Makefile headers $MAKE -f $srctree/Makefile headers_install INSTALL_HDR_PATH=$pdir/usr # move asm headers to /usr/include//asm to match the structure # used by Debian-based distros (to support multi-arch) - host_arch=$(dpkg-architecture -a$DEB_HOST_ARCH -qDEB_HOST_MULTIARCH) - mkdir $pdir/usr/include/$host_arch - mv $pdir/usr/include/asm $pdir/usr/include/$host_arch/ + mkdir "$pdir/usr/include/${DEB_HOST_MULTIARCH}" + mv "$pdir/usr/include/asm" "$pdir/usr/include/${DEB_HOST_MULTIARCH}" } rm -f debian/files @@ -190,30 +169,13 @@ for package in ${packages_enabled} do case ${package} in *-dbg) - # This must be done after linux-image, that is, we expect the - # debug package appears after linux-image in debian/control. - install_linux_image_dbg debian/linux-image-dbg debian/linux-image;; - linux-image-*|user-mode-linux-*) - install_linux_image debian/linux-image ${package};; - linux-libc-dev) - install_libc_headers debian/linux-libc-dev;; - linux-headers-*) - install_kernel_headers debian/linux-headers ${package#linux-headers-};; - esac -done - -for package in ${packages_enabled} -do - case ${package} in - *-dbg) - create_package ${package} debian/linux-image-dbg;; + install_linux_image_dbg "${package}";; linux-image-*|user-mode-linux-*) - create_package ${package} debian/linux-image;; + install_linux_image "${package}";; linux-libc-dev) - create_package ${package} debian/linux-libc-dev;; + install_libc_headers "${package}";; linux-headers-*) - create_package ${package} debian/linux-headers;; + install_kernel_headers "${package}";; esac + create_package "${package}" done - -exit 0 diff --git a/scripts/package/buildtar b/scripts/package/buildtar index 65b4ea50296219e2cfed406dddd3cb4eac0737ea..72c91a1b832f939d9861a3b21094c1df75a009fd 100755 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -23,7 +23,6 @@ tmpdir=$1 # rm -rf -- "${tmpdir}" mkdir -p -- "${tmpdir}/boot" -dirs=boot # @@ -38,12 +37,9 @@ fi # -# Try to install modules +# Install modules # -if grep -q '^CONFIG_MODULES=y' include/config/auto.conf; then - make ARCH="${ARCH}" -f ${srctree}/Makefile INSTALL_MOD_PATH="${tmpdir}" modules_install - dirs="$dirs lib" -fi +make ARCH="${ARCH}" -f ${srctree}/Makefile INSTALL_MOD_PATH="${tmpdir}" modules_install # diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option deleted file mode 100755 index 7950eff01781a306681985ce5a0a46743c411af5..0000000000000000000000000000000000000000 --- a/scripts/package/deb-build-option +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-only - -# Set up CROSS_COMPILE if not defined yet -if [ "${CROSS_COMPILE+set}" != "set" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then - echo CROSS_COMPILE=${DEB_HOST_GNU_TYPE}- -fi - -version=$(dpkg-parsechangelog -S Version) -debian_revision="${version##*-}" - -if [ "${version}" != "${debian_revision}" ]; then - echo KBUILD_BUILD_VERSION=${debian_revision} -fi diff --git a/scripts/package/debian/copyright b/scripts/package/debian/copyright new file mode 100644 index 0000000000000000000000000000000000000000..4f1f06221f092453dc568cdb1f2fa598d3279a24 --- /dev/null +++ b/scripts/package/debian/copyright @@ -0,0 +1,16 @@ +This is a packaged upstream version of the Linux kernel. + +The sources may be found at most Linux archive sites, including: +https://www.kernel.org/pub/linux/kernel + +Copyright: 1991 - 2023 Linus Torvalds and others. + +The git repository for mainline kernel development is at: +git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 dated June, 1991. + +On Debian GNU/Linux systems, the complete text of the GNU General Public +License version 2 can be found in `/usr/share/common-licenses/GPL-2'. diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules index 3dafa9496c6366d727bb8b3249886a11b93ba2d0..09830778006227f5854580bba5099e21b09b8d1e 100755 --- a/scripts/package/debian/rules +++ b/scripts/package/debian/rules @@ -1,33 +1,46 @@ #!/usr/bin/make -f # SPDX-License-Identifier: GPL-2.0-only -include debian/rules.vars +# in case debian/rules is executed directly +export DEB_RULES_REQUIRES_ROOT := no -srctree ?= . +include debian/rules.vars ifneq (,$(filter-out parallel=1,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))) NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) MAKEFLAGS += -j$(NUMJOBS) endif +revision = $(lastword $(subst -, ,$(shell dpkg-parsechangelog -S Version))) +CROSS_COMPILE ?= $(filter-out $(DEB_BUILD_GNU_TYPE)-, $(DEB_HOST_GNU_TYPE)-) +make-opts = ARCH=$(ARCH) KERNELRELEASE=$(KERNELRELEASE) KBUILD_BUILD_VERSION=$(revision) $(addprefix CROSS_COMPILE=,$(CROSS_COMPILE)) + .PHONY: binary binary-indep binary-arch binary: binary-arch binary-indep binary-indep: build-indep binary-arch: build-arch - $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \ - KERNELRELEASE=$(KERNELRELEASE) \ - run-command KBUILD_RUN_COMMAND=+$(srctree)/scripts/package/builddeb + $(MAKE) $(make-opts) \ + run-command KBUILD_RUN_COMMAND='+$$(srctree)/scripts/package/builddeb' .PHONY: build build-indep build-arch build: build-arch build-indep build-indep: build-arch: - $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \ - KERNELRELEASE=$(KERNELRELEASE) \ - $(shell $(srctree)/scripts/package/deb-build-option) \ - olddefconfig all + $(MAKE) $(make-opts) olddefconfig + $(MAKE) $(make-opts) $(if $(filter um,$(ARCH)),,headers) all .PHONY: clean clean: - rm -rf debian/files debian/linux-* - $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) clean + rm -rf debian/files debian/linux-* debian/deb-env.vars* + $(MAKE) ARCH=$(ARCH) clean + +# If DEB_HOST_ARCH is empty, it is likely that debian/rules was executed +# directly. Run 'dpkg-architecture --print-set --print-format=make' to +# generate a makefile construct that exports all DEB_* variables. +ifndef DEB_HOST_ARCH +include debian/deb-env.vars + +debian/deb-env.vars: + dpkg-architecture -a$$(cat debian/arch) --print-set --print-format=make > $@.tmp + mv $@.tmp $@ +endif diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 8a7051fad0878990cd4569a326fc7137a7db088d..76e0765dfcd6ea23294be4d54329c1317dbddb37 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -20,7 +20,7 @@ mkdir -p "${destdir}" find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*' find include scripts -type f -o -type l find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform - find "arch/${SRCARCH}" -name include -o -name scripts -type d + find "arch/${SRCARCH}" -name include -type d ) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}" { diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 3eee0143e0c5cc7671e640aad2368446e94805e0..89298983a16941a20ccbd72330af1e168652c3f4 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -56,13 +56,7 @@ patch -p1 < %{SOURCE2} %install mkdir -p %{buildroot}/boot -%ifarch ia64 -mkdir -p %{buildroot}/boot/efi -cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/efi/vmlinuz-%{KERNELRELEASE} -ln -s efi/vmlinuz-%{KERNELRELEASE} %{buildroot}/boot/ -%else cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE} -%endif %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 5044224cf6714b3e5738f1e6d30dda05c589e3ff..070149c985fea4e33126650fad3e7769605c216a 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -26,7 +26,7 @@ set_debarch() { # Attempt to find the correct Debian architecture case "$UTS_MACHINE" in - i386|ia64|alpha|m68k|riscv*) + i386|alpha|m68k|riscv*) debarch="$UTS_MACHINE" ;; x86_64) debarch=amd64 ;; @@ -176,8 +176,6 @@ else fi echo $debarch > debian/arch -extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)" -extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)" # Generate a simple changelog template cat < debian/changelog @@ -188,26 +186,6 @@ $sourcename ($packageversion) $distribution; urgency=low -- $maintainer $(date -R) EOF -# Generate copyright file -cat < debian/copyright -This is a packaged upstream version of the Linux kernel. - -The sources may be found at most Linux archive sites, including: -https://www.kernel.org/pub/linux/kernel - -Copyright: 1991 - 2018 Linus Torvalds and others. - -The git repository for mainline kernel development is at: -git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 dated June, 1991. - -On Debian GNU/Linux systems, the complete text of the GNU General Public -License version 2 can be found in \`/usr/share/common-licenses/GPL-2'. -EOF - # Generate a control file cat < debian/control Source: $sourcename @@ -215,7 +193,8 @@ Section: kernel Priority: optional Maintainer: $maintainer Rules-Requires-Root: no -Build-Depends: bc, debhelper, rsync, kmod, cpio, bison, flex $extra_build_depends +Build-Depends: debhelper-compat (= 12) +Build-Depends-Arch: bc, bison, cpio, flex, kmod, libelf-dev:native, libssl-dev:native, rsync Homepage: https://www.kernel.org/ Package: $packagename-$version @@ -268,6 +247,7 @@ ARCH := ${ARCH} KERNELRELEASE := ${KERNELRELEASE} EOF +cp "${srctree}/scripts/package/debian/copyright" debian/ cp "${srctree}/scripts/package/debian/rules" debian/ exit 0 diff --git a/scripts/package/snapcraft.template b/scripts/package/snapcraft.template index 626d278e4a5a7a9a90286444956b933e997895f7..85d5e07d1b40b2087ea49477fe90e96b924a122a 100644 --- a/scripts/package/snapcraft.template +++ b/scripts/package/snapcraft.template @@ -10,5 +10,5 @@ parts: kernel: plugin: kernel source: SRCTREE - source-type: tar + source-type: local kernel-with-firmware: false diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 40ae6b2c7a6da590f36d33caa543fd1376ba4945..3e4f54799cc0a5a366a222b66ef87d9abd9b5a36 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -590,7 +590,6 @@ static int do_file(char const *const fname) ideal_nop = ideal_nop4_arm64; is_fake_mcount64 = arm64_is_fake_mcount; break; - case EM_IA_64: reltype = R_IA64_IMM64; break; case EM_MIPS: /* reltype: e_class */ break; case EM_LOONGARCH: /* reltype: e_class */ break; case EM_PPC: reltype = R_PPC_ADDR32; break; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 6a4645a5797603c7a60ad95c4eaed5c25fbb49e2..f84df9e383fd0acf75b9afb87422aff9c088e3a8 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -275,13 +275,6 @@ if ($arch eq "x86_64") { $section_type = '%progbits'; $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_AARCH64_CALL26\\s+_mcount\$"; $type = ".quad"; -} elsif ($arch eq "ia64") { - $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; - $type = "data8"; - - if ($is_module eq "0") { - $cc .= " -mconstant-gp"; - } } elsif ($arch eq "sparc64") { # In the objdump output there are giblets like: # 0000000000000000 : diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh index 76e9cbcfbeab457bde700e733a5aae9f5ca1598f..d06baf626abe79d11401cb4a831b06e450c0b360 100755 --- a/scripts/xz_wrap.sh +++ b/scripts/xz_wrap.sh @@ -15,7 +15,6 @@ LZMA2OPTS= case $SRCARCH in x86) BCJ=--x86 ;; powerpc) BCJ=--powerpc ;; - ia64) BCJ=--ia64; LZMA2OPTS=pb=4 ;; arm) BCJ=--arm ;; sparc) BCJ=--sparc ;; esac diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index e0d1dd0a192a9d944d9e78d4fe09076500004682..64cc3044a42cedce62a745a9d19e0e2e9fab64e2 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -57,10 +57,10 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY cpu is paramount. config SECURITY_APPARMOR_HASH - bool "Enable introspection of sha1 hashes for loaded profiles" + bool "Enable introspection of sha256 hashes for loaded profiles" depends on SECURITY_APPARMOR_INTROSPECT_POLICY select CRYPTO - select CRYPTO_SHA1 + select CRYPTO_SHA256 default y help This option selects whether introspection of loaded policy @@ -74,10 +74,10 @@ config SECURITY_APPARMOR_HASH_DEFAULT depends on SECURITY_APPARMOR_HASH default y help - This option selects whether sha1 hashing of loaded policy - is enabled by default. The generation of sha1 hashes for - loaded policy provide system administrators a quick way - to verify that policy in the kernel matches what is expected, + This option selects whether sha256 hashing of loaded policy + is enabled by default. The generation of sha256 hashes for + loaded policy provide system administrators a quick way to + verify that policy in the kernel matches what is expected, however it can slow down policy load on some devices. In these cases policy hashing can be disabled by default and enabled only if needed. diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index f3c77825aa7529ba3df5f8fa4933917e7c044a5a..bcfea073e3f2e386ada23becdd1e1be92f4878e1 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1474,7 +1474,7 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata) rawdata->dents[AAFS_LOADDATA_REVISION] = dent; if (aa_g_hash_policy) { - dent = aafs_create_file("sha1", S_IFREG | 0444, dir, + dent = aafs_create_file("sha256", S_IFREG | 0444, dir, rawdata, &seq_rawdata_hash_fops); if (IS_ERR(dent)) goto fail; @@ -1643,11 +1643,11 @@ static const char *rawdata_get_link_base(struct dentry *dentry, return target; } -static const char *rawdata_get_link_sha1(struct dentry *dentry, +static const char *rawdata_get_link_sha256(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - return rawdata_get_link_base(dentry, inode, done, "sha1"); + return rawdata_get_link_base(dentry, inode, done, "sha256"); } static const char *rawdata_get_link_abi(struct dentry *dentry, @@ -1664,8 +1664,8 @@ static const char *rawdata_get_link_data(struct dentry *dentry, return rawdata_get_link_base(dentry, inode, done, "raw_data"); } -static const struct inode_operations rawdata_link_sha1_iops = { - .get_link = rawdata_get_link_sha1, +static const struct inode_operations rawdata_link_sha256_iops = { + .get_link = rawdata_get_link_sha256, }; static const struct inode_operations rawdata_link_abi_iops = { @@ -1738,7 +1738,7 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) profile->dents[AAFS_PROF_ATTACH] = dent; if (profile->hash) { - dent = create_profile_file(dir, "sha1", profile, + dent = create_profile_file(dir, "sha256", profile, &seq_profile_hash_fops); if (IS_ERR(dent)) goto fail; @@ -1748,9 +1748,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY if (profile->rawdata) { if (aa_g_hash_policy) { - dent = aafs_create("raw_sha1", S_IFLNK | 0444, dir, + dent = aafs_create("raw_sha256", S_IFLNK | 0444, dir, profile->label.proxy, NULL, NULL, - &rawdata_link_sha1_iops); + &rawdata_link_sha256_iops); if (IS_ERR(dent)) goto fail; aa_get_proxy(profile->label.proxy); diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index 6724e2ff6da8900127a19ea609fabf81764e12a0..aad486b2fca65482981ffbf47d11d4c448481c5e 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -106,16 +106,16 @@ static int __init init_profile_hash(void) if (!apparmor_initialized) return 0; - tfm = crypto_alloc_shash("sha1", 0, 0); + tfm = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(tfm)) { int error = PTR_ERR(tfm); - AA_ERROR("failed to setup profile sha1 hashing: %d\n", error); + AA_ERROR("failed to setup profile sha256 hashing: %d\n", error); return error; } apparmor_tfm = tfm; apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); - aa_info_message("AppArmor sha1 policy hashing enabled"); + aa_info_message("AppArmor sha256 policy hashing enabled"); return 0; } diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 89fbeab4b33bd89041ec5af15aac390034249165..571158ec6188f92cfb5082e8df117aff0bc914f9 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1311,7 +1311,7 @@ static int change_profile_perms_wrapper(const char *op, const char *name, return error; } -const char *stack_msg = "change_profile unprivileged unconfined converted to stacking"; +static const char *stack_msg = "change_profile unprivileged unconfined converted to stacking"; /** * aa_change_profile - perform a one-way profile transition diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 4c198d273f091d35ca7eb7caf657f9c650c2dcd5..cd569fbbfe36d29a741c9fe8eb449a82dfdf160f 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -41,6 +41,7 @@ void aa_free_str_table(struct aa_str_table *t) kfree_sensitive(t->table[i]); kfree_sensitive(t->table); t->table = NULL; + t->size = 0; } } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e490a70004089f102335e17d77c7d5005e90db03..98e1150bee9d0cbecb79c7e81cb05159f6160b04 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -469,8 +469,10 @@ static int apparmor_file_open(struct file *file) * Cache permissions granted by the previous exec check, with * implicit read and executable mmap which are required to * actually execute the image. + * + * Illogically, FMODE_EXEC is in f_flags, not f_mode. */ - if (current->in_execve) { + if (file->f_flags & __FMODE_EXEC) { fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP; return 0; } @@ -1023,7 +1025,6 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo cl = aa_get_newest_cred_label(cred); error = aa_may_signal(cred, cl, tc, tl, sig); aa_put_label(cl); - return error; } else { cl = __begin_current_label_crit_section(); error = aa_may_signal(current_cred(), cl, tc, tl, sig); @@ -1056,9 +1057,6 @@ static int apparmor_userns_create(const struct cred *cred) return error; } -/** - * apparmor_sk_alloc_security - allocate and attach the sk_security field - */ static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) { struct aa_sk_ctx *ctx; @@ -1072,9 +1070,6 @@ static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) return 0; } -/** - * apparmor_sk_free_security - free the sk_security field - */ static void apparmor_sk_free_security(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); @@ -1087,6 +1082,8 @@ static void apparmor_sk_free_security(struct sock *sk) /** * apparmor_sk_clone_security - clone the sk_security field + * @sk: sock to have security cloned + * @newsk: sock getting clone */ static void apparmor_sk_clone_security(const struct sock *sk, struct sock *newsk) @@ -1103,9 +1100,6 @@ static void apparmor_sk_clone_security(const struct sock *sk, new->peer = aa_get_label(ctx->peer); } -/** - * apparmor_socket_create - check perms before creating a new socket - */ static int apparmor_socket_create(int family, int type, int protocol, int kern) { struct aa_label *label; @@ -1127,10 +1121,14 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) /** * apparmor_socket_post_create - setup the per-socket security struct + * @sock: socket that is being setup + * @family: family of socket being created + * @type: type of the socket + * @ptotocol: protocol of the socket + * @kern: socket is a special kernel socket * * Note: - * - kernel sockets currently labeled unconfined but we may want to - * move to a special kernel label + * - kernel sockets labeled kernel_t used to use unconfined * - socket may not have sk here if created with sock_create_lite or * sock_alloc. These should be accept cases which will be handled in * sock_graft. @@ -1156,9 +1154,6 @@ static int apparmor_socket_post_create(struct socket *sock, int family, return 0; } -/** - * apparmor_socket_bind - check perms before bind addr to socket - */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -1172,9 +1167,6 @@ static int apparmor_socket_bind(struct socket *sock, aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk)); } -/** - * apparmor_socket_connect - check perms before connecting @sock to @address - */ static int apparmor_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -1188,9 +1180,6 @@ static int apparmor_socket_connect(struct socket *sock, aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk)); } -/** - * apparmor_socket_listen - check perms before allowing listen - */ static int apparmor_socket_listen(struct socket *sock, int backlog) { AA_BUG(!sock); @@ -1202,9 +1191,7 @@ static int apparmor_socket_listen(struct socket *sock, int backlog) aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk)); } -/** - * apparmor_socket_accept - check perms before accepting a new connection. - * +/* * Note: while @newsock is created and has some information, the accept * has not been done. */ @@ -1233,18 +1220,12 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, aa_sk_perm(op, request, sock->sk)); } -/** - * apparmor_socket_sendmsg - check perms before sending msg to another socket - */ static int apparmor_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); } -/** - * apparmor_socket_recvmsg - check perms before receiving a message - */ static int apparmor_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags) { @@ -1263,17 +1244,11 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) aa_sk_perm(op, request, sock->sk)); } -/** - * apparmor_socket_getsockname - check perms before getting the local address - */ static int apparmor_socket_getsockname(struct socket *sock) { return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock); } -/** - * apparmor_socket_getpeername - check perms before getting remote address - */ static int apparmor_socket_getpeername(struct socket *sock) { return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock); @@ -1292,9 +1267,6 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, aa_sk_perm(op, request, sock->sk)); } -/** - * apparmor_socket_getsockopt - check perms before getting socket options - */ static int apparmor_socket_getsockopt(struct socket *sock, int level, int optname) { @@ -1302,9 +1274,6 @@ static int apparmor_socket_getsockopt(struct socket *sock, int level, level, optname); } -/** - * apparmor_socket_setsockopt - check perms before setting socket options - */ static int apparmor_socket_setsockopt(struct socket *sock, int level, int optname) { @@ -1312,9 +1281,6 @@ static int apparmor_socket_setsockopt(struct socket *sock, int level, level, optname); } -/** - * apparmor_socket_shutdown - check perms before shutting down @sock conn - */ static int apparmor_socket_shutdown(struct socket *sock, int how) { return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); @@ -1323,6 +1289,8 @@ static int apparmor_socket_shutdown(struct socket *sock, int how) #ifdef CONFIG_NETWORK_SECMARK /** * apparmor_socket_sock_rcv_skb - check perms before associating skb to sk + * @sk: sk to associate @skb with + * @skb: skb to check for perms * * Note: can not sleep may be called with locks held * @@ -1354,6 +1322,11 @@ static struct aa_label *sk_peer_label(struct sock *sk) /** * apparmor_socket_getpeersec_stream - get security context of peer + * @sock: socket that we are trying to get the peer context of + * @optval: output - buffer to copy peer name to + * @optlen: output - size of copied name in @optval + * @len: size of @optval buffer + * Returns: 0 on success, -errno of failure * * Note: for tcp only valid if using ipsec or cipso on lan */ @@ -2182,7 +2155,7 @@ __initcall(apparmor_nf_ip_init); static char nulldfa_src[] = { #include "nulldfa.in" }; -struct aa_dfa *nulldfa; +static struct aa_dfa *nulldfa; static char stacksplitdfa_src[] = { #include "stacksplitdfa.in" diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index ed4c9803c8fad82adc9723e255dfdcda22cf9083..957654d253dd74cb50ac2a666a1588a03ac2e821 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -99,13 +99,14 @@ const char *const aa_profile_mode_names[] = { }; -static void aa_free_pdb(struct aa_policydb *policy) +static void aa_free_pdb(struct aa_policydb *pdb) { - if (policy) { - aa_put_dfa(policy->dfa); - if (policy->perms) - kvfree(policy->perms); - aa_free_str_table(&policy->trans); + if (pdb) { + aa_put_dfa(pdb->dfa); + if (pdb->perms) + kvfree(pdb->perms); + aa_free_str_table(&pdb->trans); + kfree(pdb); } } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 47ec097d6741fe0fc6c33bfc0c844f331f704f17..5e578ef0ddffb1f7adb0cc9863bcbd18ec9a562b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -478,6 +478,8 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) if (!table) goto fail; + strs->table = table; + strs->size = size; for (i = 0; i < size; i++) { char *str; int c, j, pos, size2 = aa_unpack_strdup(e, &str, NULL); @@ -520,14 +522,11 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) goto fail; if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - - strs->table = table; - strs->size = size; } return true; fail: - kfree_sensitive(table); + aa_free_str_table(strs); e->pos = saved_pos; return false; } @@ -833,6 +832,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len); if (tmpns) { + if (!tmpname) { + info = "empty profile name"; + goto fail; + } *ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL); if (!*ns_name) { info = "out of memory"; @@ -1022,8 +1025,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } } else if (rules->policy->dfa && rules->policy->start[AA_CLASS_FILE]) { + aa_put_pdb(rules->file); rules->file = aa_get_pdb(rules->policy); } else { + aa_put_pdb(rules->file); rules->file = aa_get_pdb(nullpdb); } error = -EPROTO; diff --git a/security/apparmor/task.c b/security/apparmor/task.c index f29a2e80e6bf68cbc225ef3970fd12ca29526284..c87fb9f4ac18ae91d627fb8424267da0f791291e 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -278,7 +278,9 @@ static int profile_tracer_perm(const struct cred *cred, /** * aa_may_ptrace - test if tracer task can trace the tracee + * @tracer_cred: cred of task doing the tracing (NOT NULL) * @tracer: label of the task doing the tracing (NOT NULL) + * @tracee_cred: cred of task to be traced * @tracee: task label to be traced * @request: permission request * diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 76f55dd13cb801078ba71079bf7e1c58eb2ada3b..8af2136069d239129c2994e5ee0f3e9b696ed7ea 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -237,10 +237,6 @@ static int datablob_parse(char *datablob, const char **format, break; } *decrypted_data = strsep(&datablob, " \t"); - if (!*decrypted_data) { - pr_info("encrypted_key: decrypted_data is missing\n"); - break; - } ret = 0; break; case Opt_load: diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 3c3af149bf1c12a94c318d188984ab4bda4a2edc..04a92c3d65d44de5502dd5955146e58cba4f4978 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -328,7 +328,8 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, static int tomoyo_file_open(struct file *f) { /* Don't check read permission here if called from execve(). */ - if (current->in_execve) + /* Illogically, FMODE_EXEC is in f_flags, not f_mode. */ + if (f->f_flags & __FMODE_EXEC) return 0; return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, f->f_flags); diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index e87dc67f33c692567da42cffc97d0cd072818b5a..1c65e0a3b13ce875f7416d3f63912f609080ffc0 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -322,6 +322,17 @@ static int loopback_snd_timer_close_cable(struct loopback_pcm *dpcm) return 0; } +static bool is_access_interleaved(snd_pcm_access_t access) +{ + switch (access) { + case SNDRV_PCM_ACCESS_MMAP_INTERLEAVED: + case SNDRV_PCM_ACCESS_RW_INTERLEAVED: + return true; + default: + return false; + } +}; + static int loopback_check_format(struct loopback_cable *cable, int stream) { struct snd_pcm_runtime *runtime, *cruntime; @@ -341,7 +352,8 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) check = runtime->format != cruntime->format || runtime->rate != cruntime->rate || runtime->channels != cruntime->channels || - runtime->access != cruntime->access; + is_access_interleaved(runtime->access) != + is_access_interleaved(cruntime->access); if (!check) return 0; if (stream == SNDRV_PCM_STREAM_CAPTURE) { @@ -369,7 +381,8 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) &setup->channels_id); setup->channels = runtime->channels; } - if (setup->access != runtime->access) { + if (is_access_interleaved(setup->access) != + is_access_interleaved(runtime->access)) { snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &setup->access_id); setup->access = runtime->access; @@ -584,8 +597,7 @@ static void copy_play_buf(struct loopback_pcm *play, size = play->pcm_buffer_size - src_off; if (dst_off + size > capt->pcm_buffer_size) size = capt->pcm_buffer_size - dst_off; - if (runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED || - runtime->access == SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED) + if (!is_access_interleaved(runtime->access)) copy_play_buf_part_n(play, capt, size, src_off, dst_off); else memcpy(dst + dst_off, src + src_off, size); @@ -1544,8 +1556,7 @@ static int loopback_access_get(struct snd_kcontrol *kcontrol, mutex_lock(&loopback->cable_lock); access = loopback->setup[kcontrol->id.subdevice][kcontrol->id.device].access; - ucontrol->value.enumerated.item[0] = access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED || - access == SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED; + ucontrol->value.enumerated.item[0] = !is_access_interleaved(access); mutex_unlock(&loopback->cable_lock); return 0; diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index bf685d01259d30070aaf3ac7f3ed3204bc30c5bd..de2a3d08c73c1a7c49061bbe8a7fbb1a29664b8d 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3946,7 +3946,6 @@ static int create_mute_led_cdev(struct hda_codec *codec, cdev->max_brightness = 1; cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute"; cdev->brightness_set_blocking = callback; - cdev->brightness = ledtrig_audio_get(idx); cdev->flags = LED_CORE_SUSPENDRESUME; err = led_classdev_register(&codec->core.dev, cdev); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 200779296a1b8b31239ae7ec4b643be88d24fa2e..495d63101186fd519523aab4d7fd8b25547143af 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2301,6 +2301,7 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) codec_dbg(codec, "hdmi: pcm_num set to %d\n", pcm_num); for (idx = 0; idx < pcm_num; idx++) { + struct hdmi_spec_per_cvt *per_cvt; struct hda_pcm *info; struct hda_pcm_stream *pstr; @@ -2316,6 +2317,11 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) pstr = &info->stream[SNDRV_PCM_STREAM_PLAYBACK]; pstr->substreams = 1; pstr->ops = generic_ops; + + per_cvt = get_cvt(spec, 0); + pstr->channels_min = per_cvt->channels_min; + pstr->channels_max = per_cvt->channels_max; + /* pcm number is less than pcm_rec array size */ if (spec->pcm_used >= ARRAY_SIZE(spec->pcm_rec)) break; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b68c94757051057275953fe054fe5438be1f9c06..f6f16622f9cc78a1ac8ca0de8b82e915f580f7fd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9861,6 +9861,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f6, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x87fe, "HP Laptop 15s-fq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), @@ -9955,6 +9956,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), @@ -10231,6 +10233,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), + SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), diff --git a/sound/pci/oxygen/oxygen_mixer.c b/sound/pci/oxygen/oxygen_mixer.c index 46705ec77b4810ae0fd958749580937db8830ce3..eb3aca16359c58f6f5985ba44ef2173b3d5afc50 100644 --- a/sound/pci/oxygen/oxygen_mixer.c +++ b/sound/pci/oxygen/oxygen_mixer.c @@ -718,7 +718,7 @@ static int ac97_fp_rec_volume_put(struct snd_kcontrol *ctl, oldreg = oxygen_read_ac97(chip, 1, AC97_REC_GAIN); newreg = oldreg & ~0x0707; newreg = newreg | (value->value.integer.value[0] & 7); - newreg = newreg | ((value->value.integer.value[0] & 7) << 8); + newreg = newreg | ((value->value.integer.value[1] & 7) << 8); change = newreg != oldreg; if (change) oxygen_write_ac97(chip, 1, AC97_REC_GAIN, newreg); diff --git a/sound/soc/codecs/rtq9128.c b/sound/soc/codecs/rtq9128.c index c22b047115cc47217d6455697014503d5a6aa4a1..aa3eadecd9746cd5f24427955b4aea0b49d88274 100644 --- a/sound/soc/codecs/rtq9128.c +++ b/sound/soc/codecs/rtq9128.c @@ -59,6 +59,7 @@ struct rtq9128_data { struct gpio_desc *enable; + unsigned int daifmt; int tdm_slots; int tdm_slot_width; bool tdm_input_data2_select; @@ -391,7 +392,11 @@ static int rtq9128_component_probe(struct snd_soc_component *comp) unsigned int val; int i, ret; - pm_runtime_resume_and_get(comp->dev); + ret = pm_runtime_resume_and_get(comp->dev); + if (ret < 0) { + dev_err(comp->dev, "Failed to resume device (%d)\n", ret); + return ret; + } val = snd_soc_component_read(comp, RTQ9128_REG_EFUSE_DATA); @@ -437,10 +442,7 @@ static const struct snd_soc_component_driver rtq9128_comp_driver = { static int rtq9128_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct rtq9128_data *data = snd_soc_dai_get_drvdata(dai); - struct snd_soc_component *comp = dai->component; struct device *dev = dai->dev; - unsigned int audfmt, fmtval; - int ret; dev_dbg(dev, "%s: fmt 0x%8x\n", __func__, fmt); @@ -450,35 +452,10 @@ static int rtq9128_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - fmtval = fmt & SND_SOC_DAIFMT_FORMAT_MASK; - if (data->tdm_slots && fmtval != SND_SOC_DAIFMT_DSP_A && fmtval != SND_SOC_DAIFMT_DSP_B) { - dev_err(dev, "TDM is used, format only support DSP_A or DSP_B\n"); - return -EINVAL; - } + /* Store here and will be used in runtime hw_params for DAI format setting */ + data->daifmt = fmt; - switch (fmtval) { - case SND_SOC_DAIFMT_I2S: - audfmt = 8; - break; - case SND_SOC_DAIFMT_LEFT_J: - audfmt = 9; - break; - case SND_SOC_DAIFMT_RIGHT_J: - audfmt = 10; - break; - case SND_SOC_DAIFMT_DSP_A: - audfmt = data->tdm_slots ? 12 : 11; - break; - case SND_SOC_DAIFMT_DSP_B: - audfmt = data->tdm_slots ? 4 : 3; - break; - default: - dev_err(dev, "Unsupported format 0x%8x\n", fmt); - return -EINVAL; - } - - ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDFMT_MASK, audfmt); - return ret < 0 ? ret : 0; + return 0; } static int rtq9128_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, @@ -554,10 +531,38 @@ static int rtq9128_dai_hw_params(struct snd_pcm_substream *stream, struct snd_pc unsigned int width, slot_width, bitrate, audbit, dolen; struct snd_soc_component *comp = dai->component; struct device *dev = dai->dev; + unsigned int fmtval, audfmt; int ret; dev_dbg(dev, "%s: width %d\n", __func__, params_width(param)); + fmtval = FIELD_GET(SND_SOC_DAIFMT_FORMAT_MASK, data->daifmt); + if (data->tdm_slots && fmtval != SND_SOC_DAIFMT_DSP_A && fmtval != SND_SOC_DAIFMT_DSP_B) { + dev_err(dev, "TDM is used, format only support DSP_A or DSP_B\n"); + return -EINVAL; + } + + switch (fmtval) { + case SND_SOC_DAIFMT_I2S: + audfmt = 8; + break; + case SND_SOC_DAIFMT_LEFT_J: + audfmt = 9; + break; + case SND_SOC_DAIFMT_RIGHT_J: + audfmt = 10; + break; + case SND_SOC_DAIFMT_DSP_A: + audfmt = data->tdm_slots ? 12 : 11; + break; + case SND_SOC_DAIFMT_DSP_B: + audfmt = data->tdm_slots ? 4 : 3; + break; + default: + dev_err(dev, "Unsupported format 0x%8x\n", fmtval); + return -EINVAL; + } + switch (width = params_width(param)) { case 16: audbit = 0; @@ -611,6 +616,10 @@ static int rtq9128_dai_hw_params(struct snd_pcm_substream *stream, struct snd_pc return -EINVAL; } + ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDFMT_MASK, audfmt); + if (ret < 0) + return ret; + ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDBIT_MASK, audbit); if (ret < 0) return ret; diff --git a/sound/soc/codecs/tas2562.c b/sound/soc/codecs/tas2562.c index 962c2cdfa017441ce74d7084f14f2b765bf29b74..54561ae598b87ac3db985eea203b7b1508090804 100644 --- a/sound/soc/codecs/tas2562.c +++ b/sound/soc/codecs/tas2562.c @@ -59,7 +59,6 @@ struct tas2562_data { enum tas256x_model { TAS2562, - TAS2563, TAS2564, TAS2110, }; @@ -721,7 +720,6 @@ static int tas2562_parse_dt(struct tas2562_data *tas2562) static const struct i2c_device_id tas2562_id[] = { { "tas2562", TAS2562 }, - { "tas2563", TAS2563 }, { "tas2564", TAS2564 }, { "tas2110", TAS2110 }, { } @@ -770,7 +768,6 @@ static int tas2562_probe(struct i2c_client *client) #ifdef CONFIG_OF static const struct of_device_id tas2562_of_match[] = { { .compatible = "ti,tas2562", }, - { .compatible = "ti,tas2563", }, { .compatible = "ti,tas2564", }, { .compatible = "ti,tas2110", }, { }, diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 917b1c15f71d41077243f9aef933af2f97c55c14..32913bd1a623381ee6e8d3d72c3f8e49d60ff0f7 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 // -// ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier +// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier // // Copyright (C) 2022 - 2023 Texas Instruments Incorporated // https://www.ti.com // -// The TAS2781 driver implements a flexible and configurable +// The TAS2563/TAS2781 driver implements a flexible and configurable // algo coefficient setting for one, two, or even multiple -// TAS2781 chips. +// TAS2563/TAS2781 chips. // // Author: Shenghao Ding // Author: Kevin Lu @@ -32,6 +32,7 @@ #include static const struct i2c_device_id tasdevice_id[] = { + { "tas2563", TAS2563 }, { "tas2781", TAS2781 }, {} }; @@ -39,6 +40,7 @@ MODULE_DEVICE_TABLE(i2c, tasdevice_id); #ifdef CONFIG_OF static const struct of_device_id tasdevice_of_match[] = { + { .compatible = "ti,tas2563" }, { .compatible = "ti,tas2781" }, {}, }; diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index 9c94677f681a17b65095b9a691dcb13ea5903c0f..62606e20be9a3ec0e9607e4da1acd68e99f9ba1a 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -556,7 +556,7 @@ static int graph_parse_node_multi_nm(struct snd_soc_dai_link *dai_link, struct device_node *mcodec_port; int codec_idx; - if (*nm_idx >= nm_max) + if (*nm_idx > nm_max) break; mcpu_ep_n = of_get_next_child(mcpu_port, mcpu_ep_n); diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c index 816fad8c1ff0ef439c8805dfb39edb6bdb3cab2f..540f7a29310a9f8f467af23540332db091a783b9 100644 --- a/sound/soc/intel/boards/bxt_da7219_max98357a.c +++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c @@ -797,6 +797,9 @@ static int broxton_audio_probe(struct platform_device *pdev) broxton_audio_card.name = "glkda7219max"; /* Fixup the SSP entries for geminilake */ for (i = 0; i < ARRAY_SIZE(broxton_dais); i++) { + if (!broxton_dais[i].codecs->dai_name) + continue; + /* MAXIM_CODEC is connected to SSP1. */ if (!strcmp(broxton_dais[i].codecs->dai_name, BXT_MAXIM_CODEC_DAI)) { @@ -822,6 +825,9 @@ static int broxton_audio_probe(struct platform_device *pdev) broxton_audio_card.name = "cmlda7219max"; for (i = 0; i < ARRAY_SIZE(broxton_dais); i++) { + if (!broxton_dais[i].codecs->dai_name) + continue; + /* MAXIM_CODEC is connected to SSP1. */ if (!strcmp(broxton_dais[i].codecs->dai_name, BXT_MAXIM_CODEC_DAI)) { diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c index 4631106f2a2823d4dfebd7f4e7f372cf7d5e2732..c0eb65c14aa97b4b9d14163bf4e957dcaedbea69 100644 --- a/sound/soc/intel/boards/bxt_rt298.c +++ b/sound/soc/intel/boards/bxt_rt298.c @@ -604,7 +604,8 @@ static int broxton_audio_probe(struct platform_device *pdev) int i; for (i = 0; i < ARRAY_SIZE(broxton_rt298_dais); i++) { - if (!strncmp(card->dai_link[i].codecs->name, "i2c-INT343A:00", + if (card->dai_link[i].codecs->name && + !strncmp(card->dai_link[i].codecs->name, "i2c-INT343A:00", I2C_NAME_SIZE)) { if (!strncmp(card->name, "broxton-rt298", PLATFORM_NAME_SIZE)) { diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 9ecee43ad84d115ed3460855fa46c3a6efb577bc..300391fbc2fc2b29863d6ba19169d60e9a22502c 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1256,11 +1256,11 @@ static int fill_sdw_codec_dlc(struct device *dev, else if (is_unique_device(adr_link, sdw_version, mfg_id, part_id, class_id, adr_index)) codec->name = devm_kasprintf(dev, GFP_KERNEL, - "sdw:%01x:%04x:%04x:%02x", link_id, + "sdw:0:%01x:%04x:%04x:%02x", link_id, mfg_id, part_id, class_id); else codec->name = devm_kasprintf(dev, GFP_KERNEL, - "sdw:%01x:%04x:%04x:%02x:%01x", link_id, + "sdw:0:%01x:%04x:%04x:%02x:%01x", link_id, mfg_id, part_id, class_id, unique_id); if (!codec->name) diff --git a/sound/soc/mediatek/common/mtk-dsp-sof-common.c b/sound/soc/mediatek/common/mtk-dsp-sof-common.c index f3894010f6563a0f949ec2b52307546eef648a81..7ec8965a70c06ba1b48ece52b5099832544e842a 100644 --- a/sound/soc/mediatek/common/mtk-dsp-sof-common.c +++ b/sound/soc/mediatek/common/mtk-dsp-sof-common.c @@ -24,7 +24,7 @@ int mtk_sof_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai_link *sof_dai_link = NULL; const struct sof_conn_stream *conn = &sof_priv->conn_streams[i]; - if (strcmp(rtd->dai_link->name, conn->normal_link)) + if (conn->normal_link && strcmp(rtd->dai_link->name, conn->normal_link)) continue; for_each_card_rtds(card, runtime) { diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c index 5bd6addd145051bbdc4f71583a3fa64e4581838d..bfcb2c486c39df1de6d095a4fbcca7087ed851e3 100644 --- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c +++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c @@ -1208,7 +1208,8 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) dai_link->ignore = 0; } - if (strcmp(dai_link->codecs[0].dai_name, RT1015_CODEC_DAI) == 0) + if (dai_link->num_codecs && dai_link->codecs[0].dai_name && + strcmp(dai_link->codecs[0].dai_name, RT1015_CODEC_DAI) == 0) dai_link->ops = &mt8192_rt1015_i2s_ops; if (!dai_link->platforms->name) diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index 1e33863c85ca060a961ba33e100cfb0bcd0f8103..620d7ade1992e371aef1ba87e87ee9b5aeea7d24 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -1795,10 +1795,6 @@ static const struct snd_kcontrol_new mt8195_memif_controls[] = { MT8195_AFE_IRQ_28), }; -static const struct snd_soc_component_driver mt8195_afe_pcm_dai_component = { - .name = "mt8195-afe-pcm-dai", -}; - static const struct mtk_base_memif_data memif_data[MT8195_AFE_MEMIF_NUM] = { [MT8195_AFE_MEMIF_DL2] = { .name = "DL2", @@ -3037,7 +3033,6 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct reset_control *rstc; int i, irq_id, ret; - struct snd_soc_component *component; ret = of_reserved_mem_device_init(dev); if (ret) @@ -3170,36 +3165,12 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) /* register component */ ret = devm_snd_soc_register_component(dev, &mt8195_afe_component, - NULL, 0); + afe->dai_drivers, afe->num_dai_drivers); if (ret) { dev_warn(dev, "err_platform\n"); goto err_pm_put; } - component = devm_kzalloc(dev, sizeof(*component), GFP_KERNEL); - if (!component) { - ret = -ENOMEM; - goto err_pm_put; - } - - ret = snd_soc_component_initialize(component, - &mt8195_afe_pcm_dai_component, - dev); - if (ret) - goto err_pm_put; - -#ifdef CONFIG_DEBUG_FS - component->debugfs_prefix = "pcm"; -#endif - - ret = snd_soc_add_component(component, - afe->dai_drivers, - afe->num_dai_drivers); - if (ret) { - dev_warn(dev, "err_dai_component\n"); - goto err_pm_put; - } - ret = regmap_multi_reg_write(afe->regmap, mt8195_afe_reg_defaults, ARRAY_SIZE(mt8195_afe_reg_defaults)); if (ret) @@ -3224,8 +3195,6 @@ err_pm_put: static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev) { - snd_soc_unregister_component(&pdev->dev); - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8195_afe_runtime_suspend(&pdev->dev); diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c index 4feb9fb7696792c9533e5007ca8ee9b52fe145ad..53fd8a897b9d27a5894006eaa0e96743b2e728c6 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c @@ -934,12 +934,11 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE, SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), + DAILINK_COMP_ARRAY(COMP_EMPTY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), - DAILINK_COMP_ARRAY(COMP_DUMMY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, @@ -1237,8 +1236,6 @@ static struct snd_soc_dai_link mt8195_mt6359_dai_links[] = { SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, .dpcm_capture = 1, - .init = mt8195_rt5682_init, - .ops = &mt8195_rt5682_etdm_ops, .be_hw_params_fixup = mt8195_etdm_hw_params_fixup, SND_SOC_DAILINK_REG(ETDM2_IN_BE), }, @@ -1249,7 +1246,6 @@ static struct snd_soc_dai_link mt8195_mt6359_dai_links[] = { SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS, .dpcm_playback = 1, - .ops = &mt8195_rt5682_etdm_ops, .be_hw_params_fixup = mt8195_etdm_hw_params_fixup, SND_SOC_DAILINK_REG(ETDM1_OUT_BE), }, @@ -1381,7 +1377,7 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev) struct snd_soc_dai_link *dai_link; struct mtk_soc_card_data *soc_card_data; struct mt8195_mt6359_priv *mach_priv; - struct device_node *platform_node, *adsp_node, *dp_node, *hdmi_node; + struct device_node *platform_node, *adsp_node, *codec_node, *dp_node, *hdmi_node; struct mt8195_card_data *card_data; int is5682s = 0; int init6359 = 0; @@ -1401,8 +1397,12 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev) if (!card->name) card->name = card_data->name; - if (strstr(card->name, "_5682s")) + if (strstr(card->name, "_5682s")) { + codec_node = of_find_compatible_node(NULL, NULL, "realtek,rt5682s"); is5682s = 1; + } else + codec_node = of_find_compatible_node(NULL, NULL, "realtek,rt5682i"); + soc_card_data = devm_kzalloc(&pdev->dev, sizeof(*card_data), GFP_KERNEL); if (!soc_card_data) return -ENOMEM; @@ -1488,12 +1488,27 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev) dai_link->codecs->dai_name = "i2s-hifi"; dai_link->init = mt8195_hdmi_codec_init; } - } else if (strcmp(dai_link->name, "ETDM1_OUT_BE") == 0 || - strcmp(dai_link->name, "ETDM2_IN_BE") == 0) { - dai_link->codecs->name = - is5682s ? RT5682S_DEV0_NAME : RT5682_DEV0_NAME; - dai_link->codecs->dai_name = - is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI; + } else if (strcmp(dai_link->name, "ETDM1_OUT_BE") == 0) { + if (!codec_node) { + dev_err(&pdev->dev, "Codec not found!\n"); + } else { + dai_link->codecs->of_node = codec_node; + dai_link->codecs->name = NULL; + dai_link->codecs->dai_name = + is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI; + dai_link->init = mt8195_rt5682_init; + dai_link->ops = &mt8195_rt5682_etdm_ops; + } + } else if (strcmp(dai_link->name, "ETDM2_IN_BE") == 0) { + if (!codec_node) { + dev_err(&pdev->dev, "Codec not found!\n"); + } else { + dai_link->codecs->of_node = codec_node; + dai_link->codecs->name = NULL; + dai_link->codecs->dai_name = + is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI; + dai_link->ops = &mt8195_rt5682_etdm_ops; + } } else if (strcmp(dai_link->name, "DL_SRC_BE") == 0 || strcmp(dai_link->name, "UL_SRC1_BE") == 0 || strcmp(dai_link->name, "UL_SRC2_BE") == 0) { diff --git a/sound/soc/rockchip/rk3399_gru_sound.c b/sound/soc/rockchip/rk3399_gru_sound.c index 1a504ebd3a0e9c6f0da9ea434633cf82c331c60b..6c89c7331229f0ac86b29f3bffb05f6623c98067 100644 --- a/sound/soc/rockchip/rk3399_gru_sound.c +++ b/sound/soc/rockchip/rk3399_gru_sound.c @@ -446,7 +446,7 @@ static const struct rockchip_sound_route rockchip_routes[] = { struct dailink_match_data { const char *compatible; - struct bus_type *bus_type; + const struct bus_type *bus_type; }; static const struct dailink_match_data dailink_match[] = { diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c index 93b189c2d2ee2f8886e8c1cbbc52100ea2c2be72..0dca139322f3d22a14c7f6d45a9a343090a9f2bb 100644 --- a/sound/soc/sof/ipc3-dtrace.c +++ b/sound/soc/sof/ipc3-dtrace.c @@ -137,7 +137,6 @@ static int trace_filter_parse(struct snd_sof_dev *sdev, char *string, dev_err(sdev->dev, "Parsing filter entry '%s' failed with %d\n", entry, entry_len); - kfree(*out); return -EINVAL; } } @@ -209,13 +208,13 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user ret = ipc3_trace_update_filter(sdev, num_elems, elems); if (ret < 0) { dev_err(sdev->dev, "Filter update failed: %d\n", ret); - kfree(elems); goto error; } } ret = count; error: kfree(string); + kfree(elems); return ret; } diff --git a/sound/soc/sof/ipc4-loader.c b/sound/soc/sof/ipc4-loader.c index 3539b0a66e1beedb6a5e2841a999b97f39cf75c0..c79479afa8d0db70be1a756d09ff85a1a353bbc9 100644 --- a/sound/soc/sof/ipc4-loader.c +++ b/sound/soc/sof/ipc4-loader.c @@ -482,13 +482,10 @@ void sof_ipc4_update_cpc_from_manifest(struct snd_sof_dev *sdev, msg = "No CPC match in the firmware file's manifest"; no_cpc: - dev_warn(sdev->dev, "%s (UUID: %pUL): %s (ibs/obs: %u/%u)\n", - fw_module->man4_module_entry.name, - &fw_module->man4_module_entry.uuid, msg, basecfg->ibs, - basecfg->obs); - dev_warn_once(sdev->dev, "Please try to update the firmware.\n"); - dev_warn_once(sdev->dev, "If the issue persists, file a bug at\n"); - dev_warn_once(sdev->dev, "https://github.com/thesofproject/sof/issues/\n"); + dev_dbg(sdev->dev, "%s (UUID: %pUL): %s (ibs/obs: %u/%u)\n", + fw_module->man4_module_entry.name, + &fw_module->man4_module_entry.uuid, msg, basecfg->ibs, + basecfg->obs); } const struct sof_ipc_fw_loader_ops ipc4_loader_ops = { diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 39039a647cca335aa362e5671ab0d51e6c0abcf1..85d3f390e4b290774687086f37b2a73473117e54 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -768,10 +768,8 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_evad_reading_slot) + sdev->fw_info_box.offset; sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); - if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id) { - dev_info(sdev->dev, "no llp found, fall back to default HDA path"); + if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id) info->llp_offset = 0; - } } static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 1de3ddc50eb6accdb267ab1e722caffe532d6df2..6de605a601e5f89ff7a9c12b36db81eed6d876c3 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -5361,9 +5361,9 @@ static int scarlett2_add_line_out_ctls(struct usb_mixer_interface *mixer) if (private->vol_sw_hw_switch[index]) scarlett2_vol_ctl_set_writable(mixer, i, 0); - snprintf(s, sizeof(s), - "Line Out %02d Volume Control Playback Enum", - i + 1); + scnprintf(s, sizeof(s), + "Line Out %02d Volume Control Playback Enum", + i + 1); err = scarlett2_add_new_ctl(mixer, &scarlett2_sw_hw_enum_ctl, i, 1, s, @@ -5406,8 +5406,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) /* Add input level (line/inst) controls */ for (i = 0; i < info->level_input_count; i++) { - snprintf(s, sizeof(s), fmt, i + 1 + info->level_input_first, - "Level", "Enum"); + scnprintf(s, sizeof(s), fmt, i + 1 + info->level_input_first, + "Level", "Enum"); err = scarlett2_add_new_ctl(mixer, &scarlett2_level_enum_ctl, i, 1, s, &private->level_ctls[i]); if (err < 0) @@ -5416,7 +5416,7 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) /* Add input pad controls */ for (i = 0; i < info->pad_input_count; i++) { - snprintf(s, sizeof(s), fmt, i + 1, "Pad", "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1, "Pad", "Switch"); err = scarlett2_add_new_ctl(mixer, &scarlett2_pad_ctl, i, 1, s, &private->pad_ctls[i]); if (err < 0) @@ -5425,8 +5425,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) /* Add input air controls */ for (i = 0; i < info->air_input_count; i++) { - snprintf(s, sizeof(s), fmt, i + 1 + info->air_input_first, - "Air", info->air_option ? "Enum" : "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1 + info->air_input_first, + "Air", info->air_option ? "Enum" : "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_air_ctl[info->air_option], i, 1, s, &private->air_ctls[i]); @@ -5481,9 +5481,9 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) for (i = 0; i < info->gain_input_count; i++) { if (i % 2) { - snprintf(s, sizeof(s), - "Line In %d-%d Link Capture Switch", - i, i + 1); + scnprintf(s, sizeof(s), + "Line In %d-%d Link Capture Switch", + i, i + 1); err = scarlett2_add_new_ctl( mixer, &scarlett2_input_link_ctl, i / 2, 1, s, @@ -5492,30 +5492,30 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) return err; } - snprintf(s, sizeof(s), fmt, i + 1, - "Gain", "Volume"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Gain", "Volume"); err = scarlett2_add_new_ctl( mixer, &scarlett2_input_gain_ctl, i, 1, s, &private->input_gain_ctls[i]); if (err < 0) return err; - snprintf(s, sizeof(s), fmt, i + 1, - "Autogain", "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Autogain", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_autogain_switch_ctl, i, 1, s, &private->autogain_ctls[i]); if (err < 0) return err; - snprintf(s, sizeof(s), fmt, i + 1, - "Autogain Status", "Enum"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Autogain Status", "Enum"); err = scarlett2_add_new_ctl( mixer, &scarlett2_autogain_status_ctl, i, 1, s, &private->autogain_status_ctls[i]); - snprintf(s, sizeof(s), fmt, i + 1, - "Safe", "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Safe", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_safe_ctl, i, 1, s, &private->safe_ctls[i]); @@ -5902,8 +5902,8 @@ static int scarlett2_add_direct_monitor_ctls(struct usb_mixer_interface *mixer) for (k = 0; k < private->num_mix_in; k++, index++) { char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - snprintf(name, sizeof(name), format, - mix_type, 'A' + j, k + 1); + scnprintf(name, sizeof(name), format, + mix_type, 'A' + j, k + 1); err = scarlett2_add_new_ctl( mixer, &scarlett2_monitor_mix_ctl, diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 934e2777a2dbcd9062f873c039f1853867937ed3..64df118376df66d6ddeb4895054eb12c7c40942b 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -32,6 +32,7 @@ FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ dwarf_getlocations \ + dwarf_getcfi \ eventfd \ fortify-source \ get_current_dir_name \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index dad79ede4e0ae0030ee401a1daf5d59ff871dcb6..37722e509eb9f1924380e65542b55937f3e0cc9e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -7,6 +7,7 @@ FILES= \ test-bionic.bin \ test-dwarf.bin \ test-dwarf_getlocations.bin \ + test-dwarf_getcfi.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -154,6 +155,9 @@ $(OUTPUT)test-dwarf.bin: $(OUTPUT)test-dwarf_getlocations.bin: $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-dwarf_getcfi.bin: + $(BUILD) $(DWARFLIBS) + $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c new file mode 100644 index 0000000000000000000000000000000000000000..50e7d7cb7bdf94fd9527c58997f44f52a4190a23 --- /dev/null +++ b/tools/build/feature/test-dwarf_getcfi.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int main(void) +{ + Dwarf *dwarf = NULL; + return dwarf_getcfi(dwarf) == NULL; +} diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index eb6303ff446ed93aadaeaf1ee553515eb8f39a05..4cfcef9da3e434955396dc5ccd8ccdfd1e063ed7 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) +#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.1.1 is required" +#error "OpenCSD >= 1.2.1 is required" #endif int main(void) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 39c6a250dd1b92af18e3b4a72a047d2784f89382..3a64499b0f5d63734d632ab03cd1966211473d8c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi @@ -1427,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 5cb0eeec2c8a6c4353ea82885250ab123c37d797..337fde770e45fe031c8fbb399529bb385d15ac76 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -16,6 +16,7 @@ #include #include "fs.h" +#include "../io.h" #include "debug-internal.h" #define _STR(x) #x @@ -344,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value) return filename__read_ull_base(filename, value, 0); } -#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ - int filename__read_str(const char *filename, char **buf, size_t *sizep) { - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; + struct io io; + char bf[128]; + int err; - fd = open(filename, O_RDONLY); - if (fd < 0) + io.fd = open(filename, O_RDONLY); + if (io.fd < 0) return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warn("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; + io__init(&io, io.fd, bf, sizeof(bf)); + *buf = NULL; + err = io__getdelim(&io, buf, sizep, /*delim=*/-1); + if (err < 0) { + free(*buf); + *buf = NULL; } else - free(bf); - - close(fd); + err = 0; + close(io.fd); return err; } @@ -475,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) int sysfs__read_bool(const char *entry, bool *value) { - char *buf; - size_t size; - int ret; + struct io io; + char bf[16]; + int ret = 0; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; - ret = sysfs__read_str(entry, &buf, &size); - if (ret < 0) - return ret; + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + io.fd = open(path, O_RDONLY); + if (io.fd < 0) + return -errno; - switch (buf[0]) { + io__init(&io, io.fd, bf, sizeof(bf)); + switch (io__get_char(&io)) { case '1': case 'y': case 'Y': @@ -497,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value) default: ret = -1; } - - free(buf); + close(io.fd); return ret; } diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index a77b74c5fb655a8c7b65c293ba0dd563f34f8569..84adf81020185171b0d839eb639ba523d4d75355 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -12,6 +12,7 @@ #include #include #include +#include struct io { /* File descriptor being read/ */ @@ -140,8 +141,8 @@ static inline int io__get_dec(struct io *io, __u64 *dec) } } -/* Read up to and including the first newline following the pattern of getline. */ -static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +/* Read up to and including the first delim. */ +static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim) { char buf[128]; int buf_pos = 0; @@ -151,7 +152,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l /* TODO: reuse previously allocated memory. */ free(*line_out); - while (ch != '\n') { + while (ch != delim) { ch = io__get_char(io); if (ch < 0) @@ -184,4 +185,9 @@ err_out: return -ENOMEM; } +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +{ + return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n'); +} + #endif /* __API_IO__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 82b7b2034d8a26cdb82020e6ccef6fb5baf22ec6..c5cdafb0eb49491bdf40f4d614af9f1b83cfb7cb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6704,6 +6704,67 @@ static struct { /* all other program types don't have "named" context structs */ }; +static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, + const char *subprog_name, int arg_idx, + int arg_type_id, const char *ctx_name) +{ + const struct btf_type *t; + const char *tname; + + /* check if existing parameter already matches verifier expectations */ + t = skip_mods_and_typedefs(btf, arg_type_id, NULL); + if (!btf_is_ptr(t)) + goto out_warn; + + /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe + * and perf_event programs, so check this case early on and forget + * about it for subsequent checks + */ + while (btf_is_mod(t)) + t = btf__type_by_id(btf, t->type); + if (btf_is_typedef(t) && + (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { + tname = btf__str_by_offset(btf, t->name_off) ?: ""; + if (strcmp(tname, "bpf_user_pt_regs_t") == 0) + return false; /* canonical type for kprobe/perf_event */ + } + + /* now we can ignore typedefs moving forward */ + t = skip_mods_and_typedefs(btf, t->type, NULL); + + /* if it's `void *`, definitely fix up BTF info */ + if (btf_is_void(t)) + return true; + + /* if it's already proper canonical type, no need to fix up */ + tname = btf__str_by_offset(btf, t->name_off) ?: ""; + if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) + return false; + + /* special cases */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_PERF_EVENT: + /* `struct pt_regs *` is expected, but we need to fix up */ + if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + break; + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return true; + break; + default: + break; + } + +out_warn: + pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", + prog->name, subprog_name, arg_idx, ctx_name); + return false; +} + static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) { int fn_id, fn_proto_id, ret_type_id, orig_proto_id; @@ -6766,6 +6827,69 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr return fn_id; } +static int probe_kern_arg_ctx_tag(void) +{ + /* To minimize merge conflicts with BPF token series that refactors + * feature detection code a lot, we don't integrate + * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection + * framework yet, doing our own caching internally. + * This will be cleaned up a bit later when bpf/bpf-next trees settle. + */ + static int cached_result = -1; + static const char strs[] = "\0a\0b\0arg:ctx\0"; + const __u32 types[] = { + /* [1] INT */ + BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), + /* [2] PTR -> VOID */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* [3] FUNC_PROTO `int(void *a)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(1 /* "a" */, 2), + /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ + BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), + /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(3 /* "b" */, 2), + /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ + BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), + /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ + BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), + }; + const struct bpf_insn insns[] = { + /* main prog */ + BPF_CALL_REL(+1), + BPF_EXIT_INSN(), + /* global subprog */ + BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ + BPF_EXIT_INSN(), + }; + const struct bpf_func_info_min func_infos[] = { + { 0, 4 }, /* main prog -> FUNC 'a' */ + { 2, 6 }, /* subprog -> FUNC 'b' */ + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); + + if (cached_result >= 0) + return cached_result; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (btf_fd < 0) + return 0; + + opts.prog_btf_fd = btf_fd; + opts.func_info = &func_infos; + opts.func_info_cnt = ARRAY_SIZE(func_infos); + opts.func_info_rec_size = sizeof(func_infos[0]); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", + "GPL", insns, insn_cnt, &opts); + close(btf_fd); + + cached_result = probe_fd(prog_fd); + return cached_result; +} + /* Check if main program or global subprog's function prototype has `arg:ctx` * argument tags, and, if necessary, substitute correct type to match what BPF * verifier would expect, taking into account specific program type. This @@ -6775,7 +6899,7 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr */ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) { - const char *ctx_name = NULL, *ctx_tag = "arg:ctx"; + const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; struct bpf_func_info_min *func_rec; struct btf_type *fn_t, *fn_proto_t; struct btf *btf = obj->btf; @@ -6789,6 +6913,10 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra if (!obj->btf_ext || !prog->func_info) return 0; + /* don't do any fix ups if kernel natively supports __arg_ctx */ + if (probe_kern_arg_ctx_tag() > 0) + return 0; + /* some BPF program types just don't have named context structs, so * this fallback mechanism doesn't work for them */ @@ -6851,15 +6979,11 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra if (arg_idx < 0 || arg_idx >= arg_cnt) continue; - /* check if existing parameter already matches verifier expectations */ + /* check if we should fix up argument type */ p = &btf_params(fn_proto_t)[arg_idx]; - t = skip_mods_and_typedefs(btf, p->type, NULL); - if (btf_is_ptr(t) && - (t = skip_mods_and_typedefs(btf, t->type, NULL)) && - btf_is_struct(t) && - strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) { - continue; /* no need for fix up */ - } + fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: ""; + if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) + continue; /* clone fn/fn_proto, unless we already did it for another arg */ if (func_rec->type_id == orig_fn_id) { diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c index 8e1a926a9cfe6ec3631f9b82d758e870d4dab6ef..bc142f0664b5a6a127152bbb48068290dc949dcb 100644 --- a/tools/lib/perf/Documentation/examples/sampling.c +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { fprintf(stderr, "failed to create cpus\n"); return -1; diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d6ca24f6ef78f421910614560fe3b6bb6ec45420..2378980fab8a6b263e44ed3c5bb0f7ae9ed35ce1 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs: [source,c] -- - 42 cpus = perf_cpu_map__new(NULL); + 42 cpus = perf_cpu_map__new_online_cpus(); 43 if (!cpus) { 44 fprintf(stderr, "failed to create cpus\n"); 45 return -1; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index a8f1a237931b19b182d5a197f7ce6fe4ec019351..fcfb9499ef9cdfbdfa7903c13f32a060b49e19c2 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -37,7 +37,7 @@ SYNOPSIS struct perf_cpu_map; - struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); @@ -46,7 +46,7 @@ SYNOPSIS void perf_cpu_map__put(struct perf_cpu_map *map); int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); int perf_cpu_map__nr(const struct perf_cpu_map *cpus); - bool perf_cpu_map__empty(const struct perf_cpu_map *map); + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 2a5a292173740bc220c4b7ecd655242912882335..4adcd7920d033dfa5f99ec6c4f9bcb3d7a7d2d10 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -9,6 +9,7 @@ #include #include #include +#include "internal.h" void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { @@ -27,7 +28,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) return result; } -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) { struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); @@ -66,15 +67,21 @@ void perf_cpu_map__put(struct perf_cpu_map *map) } } -static struct perf_cpu_map *cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus; + int nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) return NULL; + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus != nr_cpus_conf) { + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + nr_cpus, nr_cpus_conf, nr_cpus); + } + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; @@ -86,9 +93,27 @@ static struct perf_cpu_map *cpu_map__default_new(void) return cpus; } -struct perf_cpu_map *perf_cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) { - return cpu_map__default_new(); + struct perf_cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (onlnf) { + cpus = perf_cpu_map__read(onlnf); + fclose(onlnf); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) +{ + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return cpu_map__new_sysconf(); } @@ -180,27 +205,11 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); out_free_tmp: free(tmp_cpus); return cpus; } -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); - return cpus; -} - struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) { struct perf_cpu_map *cpus = NULL; @@ -211,7 +220,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int max_entries = 0; if (!cpu_list) - return cpu_map__read_all_cpu_map(); + return perf_cpu_map__new_online_cpus(); /* * must handle the case of empty cpumap to cover @@ -268,10 +277,12 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else - cpus = perf_cpu_map__dummy_new(); + else if (*cpu_list != '\0') { + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", + cpu_list); + cpus = perf_cpu_map__new_online_cpus(); + } else + cpus = perf_cpu_map__new_any_cpu(); invalid: free(tmp_cpus); out: @@ -300,7 +311,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) return cpus ? __perf_cpu_map__nr(cpus) : 1; } -bool perf_cpu_map__empty(const struct perf_cpu_map *map) +bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) { return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 3acbbccc19019c4baa11f82ea253daacd73f7603..058e3ff10f9b2849fd25b35164472ec9f949adca 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (evsel->system_wide) { /* System wide: set the cpu map of the evsel to all online CPUs. */ perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__new(NULL); + evsel->cpus = perf_cpu_map__new_online_cpus(); } else if (evlist->has_user_cpus && evsel->is_pmu_core) { /* * User requested CPUs on a core PMU, ensure the requested CPUs @@ -619,7 +619,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { /* Plus one for each thread */ nr_mmaps += perf_thread_map__nr(evlist->threads); /* Minus the per-thread CPU (-1) */ @@ -653,7 +653,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return mmap_per_thread(evlist, ops, mp); return mmap_per_cpu(evlist, ops, mp); diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8b51b008a81f142129069bc351c86e6aa2804ed8..c07160953224adf7e7b291f6a47ab33743d20b94 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, static struct perf_cpu_map *empty_cpu_map; if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9d8e2200aaecc3e110b7a750c2f78e7..5f08cab61ecec6d25cd1a80e4a1c5bd83a0d12e3 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -33,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index e38d859a384d2c32ef770bd18fa68798fc151bbb..228c6c629b0ce16558b880a3b8989207159b9575 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -19,10 +19,23 @@ struct perf_cache { struct perf_cpu_map; /** - * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. */ -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); @@ -31,12 +44,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); /** - * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. */ -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, @@ -51,6 +75,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + #define perf_cpu_map__for_each_idx(idx, cpus) \ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 190b56ae923addf23446aedbe4215c124bc4cb53..10b3f372264264ff35e77e529ac3387836ba09e0 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -1,15 +1,15 @@ LIBPERF_0.0.1 { global: libperf_init; - perf_cpu_map__dummy_new; - perf_cpu_map__default_new; + perf_cpu_map__new_any_cpu; + perf_cpu_map__new_online_cpus; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; - perf_cpu_map__empty; + perf_cpu_map__has_any_cpu_or_is_empty; perf_cpu_map__max; perf_cpu_map__has; perf_thread_map__new_array; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 2184814b37dd393e3a6a0fb10c6ccb4db99df042..0c903c2372c97850ab7d3fddea63ddd67bfd40c9 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -19,6 +19,7 @@ void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb) { + /* Assume fields were zero initialized. */ map->fd = -1; map->overwrite = overwrite; map->unmap_cb = unmap_cb; @@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, void perf_mmap__munmap(struct perf_mmap *map) { - if (map && map->base != NULL) { + if (!map) + return; + + zfree(&map->event_copy); + map->event_copy_sz = 0; + if (map->base) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } - if (map && map->unmap_cb) + if (map->unmap_cb) map->unmap_cb(map); } @@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, */ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; + unsigned int len = size, cpy; void *dst = map->event_copy; + if (size > map->event_copy_sz) { + dst = realloc(map->event_copy, size); + if (!dst) + return NULL; + map->event_copy = dst; + map->event_copy_sz = size; + } + do { cpy = min(map->mask + 1 - (offset & map->mask), len); memcpy(dst, &data[offset & map->mask], cpy); diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 87b0510a556ff3215c9df8f951db65d15accb990..c998b1dae86313d58134cf228552b66cf1a06e65 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); if (!cpus) return -1; @@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - cpus = perf_cpu_map__default_new(); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return -1; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ed616fc19b4f2f82061cee202483a85eb0a51832..10f70cb41ff1debbb870d3acc2cb71683bcee7f6 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -46,7 +46,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -261,7 +261,7 @@ static int test_mmap_thread(void) threads = perf_thread_map__new_dummy(); __T("failed to create threads", threads); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); __T("failed to create cpus", cpus); perf_thread_map__set_pid(threads, 0, pid); @@ -350,7 +350,7 @@ static int test_mmap_cpus(void) attr.config = id; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a11fc51bfb688304e764f9166ebeddb11c902938..545ec31505466647b9ec182f79534fa713df4a57 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -27,7 +27,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evsel = perf_evsel__new(&attr); diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index adfbae27dc369d8a6dedbe01d2faca2b86f4594a..8561b0f01a2476908bd2bfd73dfd8677be959ef2 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -52,11 +52,21 @@ void uniq(struct cmdnames *cmds) if (!cmds->cnt) return; - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - + for (i = 1; i < cmds->cnt; i++) { + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + zfree(&cmds->names[i - 1]); + } + for (i = 0, j = 0; i < cmds->cnt; i++) { + if (cmds->names[i]) { + if (i == j) + j++; + else + cmds->names[j++] = cmds->names[i]; + } + } cmds->cnt = j; + while (j < i) + cmds->names[j++] = NULL; } void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f533e76fb48002b7e94e9733eb8747cb1b11f05a..f5b81d439387a14f36954def542a78106a46ce23 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log +tests/shell/*.shellcheck_log +tests/shell/coresight/*.shellcheck_log +tests/shell/lib/*.shellcheck_log feature/ libapi/ libbpf/ @@ -49,3 +52,4 @@ libtraceevent/ libtraceevent_plugins/ fixdep Documentation/doc.dep +python_ext_build/ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index a97f95825b14e8b77186587ad4f19bf2766d7e8c..19cc179be9a784708492f18a94f06752b33c49f6 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -25,6 +25,7 @@ q quicker (less detailed) decoding A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) + T use the timestamp trace as kernel time The default is all events i.e. the same as --itrace=iybxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index fe168e8165c8d22dd51933da407c9388c685f252..b95524bea021eb2fccdfbb4bf49d465e574584b0 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -155,6 +155,17 @@ include::itrace.txt[] stdio or stdio2 (Default: 0). Note that this is about selection of functions to display, not about lines within the function. +--data-type[=TYPE_NAME]:: + Display data type annotation instead of code. It infers data type of + samples (if they are memory accessing instructions) using DWARF debug + information. It can take an optional argument of data type name. In + that case it'd show annotation for the type only, otherwise it'd show + all data types it finds. + +--type-stat:: + Show stats for the data type annotation. + + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 0b4e79dbd3f689942d4e734eb4e48161634ae063..379f9d7a8ab11a029e602bde77d57a0ef7785f79 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -251,7 +251,8 @@ annotate.*:: addr2line binary to use for file names and line numbers. annotate.objdump:: - objdump binary to use for disassembly and annotations. + objdump binary to use for disassembly and annotations, + including in the 'perf test' command. annotate.disassembler_style:: Use this to change the default disassembler style to some other value @@ -722,7 +723,6 @@ session-.*:: Defines new record session for daemon. The value is record's command line without the 'record' keyword. - SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index d5f78e125efed157d1270ff256513fa31673f60f..1b90575ee3c84eb206f9291e8fd05d43c70b2f9c 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS which supports up to precise-level 2, and precise level 3 for some special cases -On AMD systems it is implemented using IBS (up to precise-level 2). -The precise modifier works with event types 0x76 (cpu-cycles, CPU -clocks not halted) and 0xC1 (micro-ops retired). Both events map to -IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see the +On AMD systems it is implemented using IBS OP (up to precise-level 2). +Unlike Intel PEBS which provides levels of precision, AMD core pmu is +inherently non-precise and IBS is inherently precise. (i.e. ibs_op//, +ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier +works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 +(micro-ops retired). Both events map to IBS execution sampling (IBS op) +with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) section of the [AMD Processor Programming Reference (PPR)] relevant to the family, model and stepping of the processor being used). diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 503abcba1438038732cff8afcfab5012feed0fc5..f5938d616d75176cb7f42e5500fee4f03a4ebafc 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -119,7 +119,7 @@ INFO OPTIONS CONTENTION OPTIONS --------------- +------------------ -k:: --key=:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1889f66addf2aa936bafea132aed55ab0908ff8d..6015fdd08fb63b679b56195b7734e7449a318851 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -445,6 +445,10 @@ following filters are defined: 4th-Gen Xeon+ server), the save branch type is unconditionally enabled when the taken branch stack sampling is enabled. - priv: save privilege state during sampling in case binary is not available later + - counter: save occurrences of the event since the last branch entry. Currently, the + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and + later platforms. An error out is expected if it's used on the unsupported + kernel or CPUs. + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index af068b4f1e5a696464ba2040b7b39a4831b32050..38f59ac064f7d4615daf5e1bba57a7045ba4c597 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -118,6 +118,9 @@ OPTIONS - retire_lat: On X86, this reports pipeline stall of this instruction compared to the previous instruction in cycles. And currently supported only on X86 - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate + - type: Data type of sample memory access. + - typeoff: Offset in the data type of sample memory access. + - symoff: Offset in the symbol. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8f789fa1242e0dfdeab8aee6268f996e4933a6ac..5af2e432b54fb51a5e5371cffdfd22d162e0c915 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -422,7 +422,34 @@ See perf list output for the possible metrics and metricgroups. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs. +--no-merge:: +Do not aggregate/merge counts across monitored CPUs or PMUs. + +When multiple events are created from a single event specification, +stat will, by default, aggregate the event counts and show the result +in a single row. This option disables that behavior and shows the +individual events and counts. + +Multiple events are created from a single event specification when: + +1. PID monitoring isn't requested and the system has more than one + CPU. For example, a system with 8 SMT threads will have one event + opened on each thread and aggregation is performed across them. + +2. Prefix or glob wildcard matching is used for the PMU name. For + example, multiple memory controller PMUs may exist typically with a + suffix of _0, _1, etc. By default the event counts will all be + combined if the PMU is specified without the suffix such as + uncore_imc rather than uncore_imc_0. + +3. Aliases, which are listed immediately after the Kernel PMU events + by perf list, are used. + +--hybrid-merge:: +Merge core event counts from all core PMUs. In hybrid or big.LITTLE +systems by default each core PMU will report its count +separately. This option forces core PMU counts to be combined to give +a behavior closer to having a single CPU type in the system. --topdown:: Print top-down metrics supported by the CPU. This allows to determine @@ -475,29 +502,6 @@ highlight 'tma_frontend_bound'. This metric may be drilled into with Error out if the input is higher than the supported max level. ---no-merge:: -Do not merge results from same PMUs. - -When multiple events are created from a single event specification, -stat will, by default, aggregate the event counts and show the result -in a single row. This option disables that behavior and shows -the individual events and counts. - -Multiple events are created from a single event specification when: -1. Prefix or glob matching is used for the PMU name. -2. Aliases, which are listed immediately after the Kernel PMU events - by perf list, are used. - ---hybrid-merge:: -Merge the hybrid event counts from all PMUs. - -For hybrid events, by default, the stat aggregates and reports the event -counts per PMU. But sometimes, it's also useful to aggregate event counts -from all PMUs. This option enables that behavior and reports the counts -without PMUs. - -For non-hybrid events, it should be no effect. - --smi-cost:: Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index ba3df49c169d329223f6d03fb6cab122e737b757..a7cf7bc2f9689dcdfea6cbe8daaaf01205e76be3 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -64,6 +64,9 @@ OPTIONS perf-event-open - Print perf_event_open() arguments and return value +--debug-file:: + Write debug output to a specified file. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b3e6ed10f40c6f6c57578a8c99365dffb53ca94a..aa55850fbc213b939df67bb1df68f776ca555006 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -476,6 +476,11 @@ else else CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT endif # dwarf_getlocations + ifneq ($(feature-dwarf_getcfi), 1) + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142); + else + CFLAGS += -DHAVE_DWARF_CFI_SUPPORT + endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -680,15 +685,15 @@ ifndef BUILD_BPF_SKEL endif ifeq ($(BUILD_BPF_SKEL),1) - ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) - dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) - BUILD_BPF_SKEL := 0 - else ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) + ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool) BUILD_BPF_SKEL := 0 else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool) BUILD_BPF_SKEL := 0 + else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) + dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) + BUILD_BPF_SKEL := 0 else ifeq ($(call get-executable,$(CLANG)),) dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing) BUILD_BPF_SKEL := 0 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 058c9aecf6087d065a31115492b4e80bed69c7a2..27e7c478880fdecd10761fc07d4249bf1581d9c0 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -134,6 +134,8 @@ include ../scripts/utilities.mak # x86 instruction decoder - new instructions test # # Define GEN_VMLINUX_H to generate vmlinux.h from the BTF. +# +# Define NO_SHELLCHECK if you do not want to run shellcheck during build # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -227,8 +229,15 @@ else force_fixdep := $(config) endif +# Runs shellcheck on perf test shell scripts +ifeq ($(NO_SHELLCHECK),1) + SHELLCHECK := +else + SHELLCHECK := $(shell which shellcheck 2> /dev/null) +endif + export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR HOSTCFLAGS +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK include $(srctree)/tools/build/Makefile.include @@ -1152,7 +1161,7 @@ bpf-skel-clean: clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) - $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -o -name '*.shellcheck_log' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2cf873d71dff03730e62b31c299ed90c2c0a975e..77e6663c1703b8776c4cc33fbf0b81aac833583f 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, { int i, err = -EINVAL; struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* Set option of each CPU we have */ for (i = 0; i < cpu__max_cpu().cpu; i++) { @@ -211,7 +211,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, * program can run on any CPUs in this case, thus don't skip * validation. */ - if (!perf_cpu_map__empty(event_cpus) && + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && !perf_cpu_map__has(event_cpus, cpu)) continue; @@ -435,7 +435,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * Also the case of per-cpu mmaps, need the contextID in order to be notified * when a context switch happened. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, "timestamp", 1); evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, @@ -461,7 +461,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(evsel, TIME); err = cs_etm_validate_config(itr, cs_etm_evsel); @@ -536,10 +536,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, int i; int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* cpu map is not empty, we have specific CPUs to work with */ - if (!perf_cpu_map__empty(event_cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { for (i = 0; i < cpu__max_cpu().cpu; i++) { struct perf_cpu cpu = { .cpu = i, }; @@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, u64 nr_cpu, type; struct perf_cpu_map *cpu_map; struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; @@ -814,7 +814,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return -EINVAL; /* If the cpu_map is empty all online CPUs are involved */ - if (perf_cpu_map__empty(event_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { cpu_map = online_cpus; } else { /* Make sure all specified CPUs are online */ diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3acc739bd0027b214a4aa5296e81bfcac3afba7..51ccbfd3d246d484400c9a83220efaa8833f9f95 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, sample CPU for AUX event; * also enable the timestamp tracing for samples correlation. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(arm_spe_evsel, CPU); evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, "ts_enable", 1); @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); evsel__set_sample_bit(tracking_evsel, CPU); diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index a2eef9ec5491096d0f718f59e3e2ae6996e35ca3..97037499152ef785837b8815aac4da7907dfebf5 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) int get_cpuid(char *buf, size_t sz) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); int ret; if (!cpus) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index 98e19c5366acfd628fb7cf979e295f01119114a8..21cc7e4149f721d7d0a28f715df89a991fc3d606 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -61,10 +61,10 @@ static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, st const char *c = strchr(ops->raw, '#'); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) c = NULL; if (c++ != NULL) diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c index eb152770f148562b8032cbd7a14c1e153e273d93..40f5d17fedab6955c89042837eddb13227b04c58 100644 --- a/tools/perf/arch/x86/tests/hybrid.c +++ b/tools/perf/arch/x86/tests/hybrid.c @@ -47,7 +47,7 @@ static int test__hybrid_hw_group_event(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return TEST_OK; } @@ -102,7 +102,7 @@ static int test__hybrid_group_modifier1(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -163,6 +163,24 @@ static int test__checkevent_pmu(struct evlist *evlist) return TEST_OK; } +static int test__hybrid_hw_group_event_2(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == 0x3c); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + return TEST_OK; +} + struct evlist_test { const char *name; bool (*valid)(void); @@ -171,27 +189,27 @@ struct evlist_test { static const struct evlist_test test__hybrid_events[] = { { - .name = "cpu_core/cpu-cycles/", + .name = "cpu_core/cycles/", .check = test__hybrid_hw_event_with_pmu, /* 0 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", + .name = "{cpu_core/cycles/,cpu_core/branches/}", .check = test__hybrid_hw_group_event, /* 1 */ }, { - .name = "{cpu-clock,cpu_core/cpu-cycles/}", + .name = "{cpu-clock,cpu_core/cycles/}", .check = test__hybrid_sw_hw_group_event, /* 2 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu-clock}", + .name = "{cpu_core/cycles/,cpu-clock}", .check = test__hybrid_hw_sw_group_event, /* 3 */ }, { - .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", + .name = "{cpu_core/cycles/k,cpu_core/branches/u}", .check = test__hybrid_group_modifier1, /* 4 */ }, @@ -215,6 +233,11 @@ static const struct evlist_test test__hybrid_events[] = { .check = test__hybrid_cache_event, /* 8 */ }, + { + .name = "{cpu_core/cycles/,cpu_core/cpu-cycles/}", + .check = test__hybrid_hw_group_event_2, + /* 9 */ + }, }; static int test_event(const struct evlist_test *e) diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 5309348057108f60ea2f3d95ad03e1ef0eda8149..399c4a0a29d8c1ac4f21e2ee1030e0beec27aa34 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -113,3 +113,41 @@ int regs_query_register_offset(const char *name) return roff->offset; return -EINVAL; } + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_arch_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 5741ffe473120a2c09dd74759aa955f2aec261d8..e65b7dbe27fbcee6cc4890a30622dc27f62a0e95 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -14,66 +14,79 @@ #if defined(__x86_64__) -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) +struct perf_event__synthesize_extra_kmaps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) { - int rc = 0; - struct map_rb_node *pos; - struct maps *kmaps = machine__kernel_maps(machine); - union perf_event *event = zalloc(sizeof(event->mmap) + - machine->id_hdr_size); + struct perf_event__synthesize_extra_kmaps_cb_args *args = data; + union perf_event *event = args->event; + struct kmap *kmap; + size_t size; - if (!event) { - pr_debug("Not enough memory synthesizing mmap event " - "for extra kernel maps\n"); - return -1; - } + if (!__map__is_extra_kernel_map(map)) + return 0; - maps__for_each_entry(kmaps, pos) { - struct kmap *kmap; - size_t size; - struct map *map = pos->map; + kmap = map__kmap(map); - if (!__map__is_extra_kernel_map(map)) - continue; + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + args->machine->id_hdr_size; - kmap = map__kmap(map); + memset(event, 0, size); - size = sizeof(event->mmap) - sizeof(event->mmap.filename) + - PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + - machine->id_hdr_size; + event->mmap.header.type = PERF_RECORD_MMAP; - memset(event, 0, size); + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(args->machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = size; - /* - * kernel uses 0 for user space maps, see kernel/perf_event.c - * __perf_event_mmap - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; - else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pgoff = map__pgoff(map); + event->mmap.pid = args->machine->pid; - event->mmap.header.size = size; + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pgoff = map__pgoff(map); - event->mmap.pid = machine->pid; + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; - strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + return 0; +} - if (perf_tool__process_synth_event(tool, event, machine, - process) != 0) { - rc = -1; - break; - } +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc; + struct maps *kmaps = machine__kernel_maps(machine); + struct perf_event__synthesize_extra_kmaps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + .event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size), + }; + + if (!args.event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; } - free(event); + rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args); + + free(args.event); return rc; } diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d2c8cac1147021dbf51185db50409d1c078bf9e9..af8ae4647585b460c5f3ef381cfea29f3fa966bd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; - if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) { + if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); return -EINVAL; } @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_bts_evsel, CPU); } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index fa0c718b9e7277f0374356bf5d46b603f19ed7ca..d199619df3abe1b22c70fbfa1eea485eb095ae6f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, ui__warning("Intel Processor Trace: TSC not available\n"); } - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); + per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; @@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__empty(cpus) && + if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && @@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_pt_evsel, CPU); } @@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->immediate = true; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ evsel__set_sample_bit(tracking_evsel, CPU); @@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Warn the user when we do not have enough information to decode i.e. * per-cpu with no sched_switch (except workload-only). */ - if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && + if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !target__none(&opts->target) && !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 6bfffe83dde99bd240de98cdde473d1bb8620d3b..d3db73dac66afe48ff80ff30849e125240c812e7 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index cb5174b53940b265ae2be3adec3a9b916d524dab..06bb3187660abdd736821b5e96dddc73af261fed 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2005a3fa3026799d1cfcd246cc956ac4e528c97b..0c69d20efa329427c71141c09e6f1c1a9031738c 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 092cbd52db82b500360022d0d768999d1ca42cb9..7a4973346180fc91009573c503a021c9b217ee29 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index c0035990a33cebafea34493b7978a9e1c2c9b2f7..d9ad736c1a3e0d13317aaf28a777a6327c17e5d3 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 5ab0234d74e696d1afcc48451110653bc4da3649..b66df553e5614cb393066f2ec8b66ee75ba15ed8 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "mlockall"); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 18a5894af8bb51fb6aa5353db05ef9c75a270253..690fd6d3da130161ac6473df2ba5dabecbc8324a 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c index a01c40131493b76dc75c354b2e60b5321944d198..269c1f4a6852ce49584674322943f1b71e9a77cd 100644 --- a/tools/perf/bench/sched-seccomp-notify.c +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -32,7 +32,7 @@ static bool sync_mode; static const struct option options[] = { OPT_U64('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('s', "sync-mode", &sync_mode, - "Enable the synchronious mode for seccomp notifications"), + "Enable the synchronous mode for seccomp notifications"), OPT_END() }; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index aeeb801f1ed7b15f1118de904d4366feb7ed3a97..6c1cc797692d949f6fc07bb195007ecfad536dd6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -20,6 +20,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/annotate.h" +#include "util/annotate-data.h" #include "util/event.h" #include #include "util/parse-events.h" @@ -45,7 +46,6 @@ struct perf_annotate { struct perf_tool tool; struct perf_session *session; - struct annotation_options opts; #ifdef HAVE_SLANG_SUPPORT bool use_tui; #endif @@ -56,9 +56,13 @@ struct perf_annotate { bool skip_missing; bool has_br_stack; bool group_set; + bool data_type; + bool type_stat; + bool insn_stat; float min_percent; const char *sym_hist_filter; const char *cpu_list; + const char *target_data_type; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -94,6 +98,7 @@ static void process_basic_block(struct addr_map_symbol *start, struct annotation *notes = sym ? symbol__annotation(sym) : NULL; struct block_range_iter iter; struct block_range *entry; + struct annotated_branch *branch; /* * Sanity; NULL isn't executable and the CPU cannot execute backwards @@ -105,6 +110,8 @@ static void process_basic_block(struct addr_map_symbol *start, if (!block_range_iter__valid(&iter)) return; + branch = annotation__get_branch(notes); + /* * First block in range is a branch target. */ @@ -118,8 +125,8 @@ static void process_basic_block(struct addr_map_symbol *start, entry->coverage++; entry->sym = sym; - if (notes) - notes->max_coverage = max(notes->max_coverage, entry->coverage); + if (branch) + branch->max_coverage = max(branch->max_coverage, entry->coverage); } while (block_range_iter__next(&iter)); @@ -315,9 +322,153 @@ static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(&he->ms, evsel, &ann->opts); + return symbol__tty_annotate(&he->ms, evsel); + + return symbol__tty_annotate2(&he->ms, evsel); +} + +static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel) +{ + struct dso *dso = map__dso(he->ms.map); + int nr_members = 1; + int nr_samples = he->stat.nr_events; + + if (evsel__is_group_event(evsel)) { + struct hist_entry *pair; + + list_for_each_entry(pair, &he->pairs.head, pairs.node) + nr_samples += pair->stat.nr_events; + } + + printf("Annotate type: '%s' in %s (%d samples):\n", + he->mem_type->self.type_name, dso->name, nr_samples); + + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + int i = 0; + + for_each_group_evsel(pos, evsel) + printf(" event[%d] = %s\n", i++, pos->name); + + nr_members = evsel->core.nr_members; + } + + printf("============================================================================\n"); + printf("%*s %10s %10s %s\n", 11 * nr_members, "samples", "offset", "size", "field"); +} + +static void print_annotated_data_type(struct annotated_data_type *mem_type, + struct annotated_member *member, + struct evsel *evsel, int indent) +{ + struct annotated_member *child; + struct type_hist *h = mem_type->histograms[evsel->core.idx]; + int i, nr_events = 1, samples = 0; + + for (i = 0; i < member->size; i++) + samples += h->addr[member->offset + i].nr_samples; + printf(" %10d", samples); - return symbol__tty_annotate2(&he->ms, evsel, &ann->opts); + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + + for_each_group_member(pos, evsel) { + h = mem_type->histograms[pos->core.idx]; + + samples = 0; + for (i = 0; i < member->size; i++) + samples += h->addr[member->offset + i].nr_samples; + printf(" %10d", samples); + } + nr_events = evsel->core.nr_members; + } + + printf(" %10d %10d %*s%s\t%s", + member->offset, member->size, indent, "", member->type_name, + member->var_name ?: ""); + + if (!list_empty(&member->children)) + printf(" {\n"); + + list_for_each_entry(child, &member->children, node) + print_annotated_data_type(mem_type, child, evsel, indent + 4); + + if (!list_empty(&member->children)) + printf("%*s}", 11 * nr_events + 24 + indent, ""); + printf(";\n"); +} + +static void print_annotate_data_stat(struct annotated_data_stat *s) +{ +#define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld) + + int bad = s->no_sym + + s->no_insn + + s->no_insn_ops + + s->no_mem_ops + + s->no_reg + + s->no_dbginfo + + s->no_cuinfo + + s->no_var + + s->no_typeinfo + + s->invalid_size + + s->bad_offset; + int ok = s->total - bad; + + printf("Annotate data type stats:\n"); + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n", + s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1)); + printf("-----------------------------------------------------------\n"); + PRINT_STAT(no_sym); + PRINT_STAT(no_insn); + PRINT_STAT(no_insn_ops); + PRINT_STAT(no_mem_ops); + PRINT_STAT(no_reg); + PRINT_STAT(no_dbginfo); + PRINT_STAT(no_cuinfo); + PRINT_STAT(no_var); + PRINT_STAT(no_typeinfo); + PRINT_STAT(invalid_size); + PRINT_STAT(bad_offset); + printf("\n"); + +#undef PRINT_STAT +} + +static void print_annotate_item_stat(struct list_head *head, const char *title) +{ + struct annotated_item_stat *istat, *pos, *iter; + int total_good, total_bad, total; + int sum1, sum2; + LIST_HEAD(tmp); + + /* sort the list by count */ + list_splice_init(head, &tmp); + total_good = total_bad = 0; + + list_for_each_entry_safe(istat, pos, &tmp, list) { + total_good += istat->good; + total_bad += istat->bad; + sum1 = istat->good + istat->bad; + + list_for_each_entry(iter, head, list) { + sum2 = iter->good + iter->bad; + if (sum1 > sum2) + break; + } + list_move_tail(&istat->list, &iter->list); + } + total = total_good + total_bad; + + printf("Annotate %s stats\n", title); + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n\n", total, + total_good, 100.0 * total_good / (total ?: 1), + total_bad, 100.0 * total_bad / (total ?: 1)); + printf(" %-10s: %5s %5s\n", "Name", "Good", "Bad"); + printf("-----------------------------------------------------------\n"); + list_for_each_entry(istat, head, list) + printf(" %-10s: %5d %5d\n", istat->name, istat->good, istat->bad); + printf("\n"); } static void hists__find_annotations(struct hists *hists, @@ -327,6 +478,11 @@ static void hists__find_annotations(struct hists *hists, struct rb_node *nd = rb_first_cached(&hists->entries), *next; int key = K_RIGHT; + if (ann->type_stat) + print_annotate_data_stat(&ann_data_stat); + if (ann->insn_stat) + print_annotate_item_stat(&ann_insn_stat, "Instruction"); + while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); struct annotation *notes; @@ -359,11 +515,38 @@ find_next: continue; } + if (ann->data_type) { + /* skip unknown type */ + if (he->mem_type->histograms == NULL) + goto find_next; + + if (ann->target_data_type) { + const char *type_name = he->mem_type->self.type_name; + + /* skip 'struct ' prefix in the type name */ + if (strncmp(ann->target_data_type, "struct ", 7) && + !strncmp(type_name, "struct ", 7)) + type_name += 7; + + /* skip 'union ' prefix in the type name */ + if (strncmp(ann->target_data_type, "union ", 6) && + !strncmp(type_name, "union ", 6)) + type_name += 6; + + if (strcmp(ann->target_data_type, type_name)) + goto find_next; + } + + print_annotated_data_header(he, evsel); + print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0); + printf("\n"); + goto find_next; + } + if (use_browser == 2) { int ret; int (*annotate)(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); annotate = dlsym(perf_gtk_handle, @@ -373,14 +556,14 @@ find_next: return; } - ret = annotate(he, evsel, &ann->opts, NULL); + ret = annotate(he, evsel, NULL); if (!ret || !ann->skip_missing) return; /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); + key = hist_entry__tui_annotate(he, evsel, NULL); switch (key) { case -1: @@ -422,9 +605,9 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - if (!ann->opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&session->header.env, - &ann->opts.objdump_path); + &annotate_opts.objdump_path); if (ret) goto out; } @@ -457,8 +640,20 @@ static int __cmd_annotate(struct perf_annotate *ann) evsel__reset_sample_bit(pos, CALLCHAIN); evsel__output_resort(pos, NULL); - if (symbol_conf.event_group && !evsel__is_group_leader(pos)) + /* + * An event group needs to display other events too. + * Let's delay printing until other events are processed. + */ + if (symbol_conf.event_group) { + if (!evsel__is_group_leader(pos)) { + struct hists *leader_hists; + + leader_hists = evsel__hists(evsel__leader(pos)); + hists__match(leader_hists, hists); + hists__link(leader_hists, hists); + } continue; + } hists__find_annotations(hists, pos, ann); } @@ -469,6 +664,20 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } + /* Display group events together */ + evlist__for_each_entry(session->evlist, pos) { + struct hists *hists = evsel__hists(pos); + u32 nr_samples = hists->stats.nr_samples; + + if (nr_samples == 0) + continue; + + if (!symbol_conf.event_group || !evsel__is_group_leader(pos)) + continue; + + hists__find_annotations(hists, pos, ann); + } + if (use_browser == 2) { void (*show_annotations)(void); @@ -495,6 +704,17 @@ static int parse_percent_limit(const struct option *opt, const char *str, return 0; } +static int parse_data_type(const struct option *opt, const char *str, int unset) +{ + struct perf_annotate *ann = opt->value; + + ann->data_type = !unset; + if (str) + ann->target_data_type = strdup(str); + + return 0; +} + static const char * const annotate_usage[] = { "perf annotate []", NULL @@ -558,9 +778,9 @@ int cmd_annotate(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, + OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, + OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path, "Don't shorten the displayed pathnames"), OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), @@ -571,15 +791,15 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK(0, "symfs", NULL, "directory", "Look for files with symbols relative to this directory", symbol__config_symfs), - OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -598,7 +818,7 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", "'always' (default), 'never' or 'auto' only applicable to --stdio mode", stdio__config_color, "always"), - OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_CALLBACK(0, "percent-limit", &annotate, "percent", @@ -606,7 +826,13 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options\n" ITRACE_HELP, itrace_parse_synth_opts), - + OPT_CALLBACK_OPTARG(0, "data-type", &annotate, NULL, "name", + "Show data type annotate for the memory accesses", + parse_data_type), + OPT_BOOLEAN(0, "type-stat", &annotate.type_stat, + "Show stats for the data type annotation"), + OPT_BOOLEAN(0, "insn-stat", &annotate.insn_stat, + "Show instruction stats for the data type annotation"), OPT_END() }; int ret; @@ -614,13 +840,13 @@ int cmd_annotate(int argc, const char **argv) set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); - annotation_options__init(&annotate.opts); + annotation_options__init(); ret = hists__init(); if (ret < 0) return ret; - annotation_config__init(&annotate.opts); + annotation_config__init(); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { @@ -635,13 +861,13 @@ int cmd_annotate(int argc, const char **argv) } if (disassembler_style) { - annotate.opts.disassembler_style = strdup(disassembler_style); - if (!annotate.opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - annotate.opts.objdump_path = strdup(objdump_path); - if (!annotate.opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -650,7 +876,7 @@ int cmd_annotate(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&annotate.opts) < 0) + if (annotate_check_args() < 0) return -EINVAL; #ifdef HAVE_GTK2_SUPPORT @@ -660,6 +886,13 @@ int cmd_annotate(int argc, const char **argv) } #endif +#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT + if (annotate.data_type) { + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); + return -ENOTSUP; + } +#endif + ret = symbol__validate_sym_arguments(); if (ret) return ret; @@ -702,6 +935,14 @@ int cmd_annotate(int argc, const char **argv) use_browser = 2; #endif + /* FIXME: only support stdio for now */ + if (annotate.data_type) { + use_browser = 0; + annotate_opts.annotate_src = false; + symbol_conf.annotate_data_member = true; + symbol_conf.annotate_data_sample = true; + } + setup_browser(true); /* @@ -709,7 +950,10 @@ int cmd_annotate(int argc, const char **argv) * symbol, we do not care about the processes in annotate, * set sort order to avoid repeated output. */ - sort_order = "dso,symbol"; + if (annotate.data_type) + sort_order = "dso,type"; + else + sort_order = "dso,symbol"; /* * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle @@ -731,7 +975,7 @@ out_delete: #ifndef NDEBUG perf_session__delete(annotate.session); #endif - annotation_options__exit(&annotate.opts); + annotation_options__exit(); return ret; } diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a4cf9de7a7b5a9d6ae416f1a65f6a5c47c6e2e4f..f78eea9e21539352e96c68f37c4b0001c84054e4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2320,7 +2320,7 @@ static int setup_nodes(struct perf_session *session) nodes[node] = set; /* empty node, skip */ - if (perf_cpu_map__empty(map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(map)) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index ac2e6c75f9120192ad5220eaf727896ef8b11aee..eb30c8eca48878482d9e9682165330a161a6f3f8 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace) static int reset_tracing_cpu(void) { - struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus(); int ret; ret = set_tracing_cpumask(cpumap); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c8cf2fdd9cff9637ebd97660321e858fa3aeb3e0..eb3ef5c24b66258c568dea02252975f1addb3924 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2265,6 +2265,12 @@ int cmd_inject(int argc, const char **argv) "perf inject []", NULL }; + + if (!inject.itrace_synth_opts.set) { + /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ + symbol_conf.lazy_load_kernel_maps = true; + } + #ifndef HAVE_JITDUMP set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a3ff2f4edbaa5064b040b256eaee1361a34122e5..230461280e4525a612a6842c3a99c7b1a1225929 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -2285,8 +2285,10 @@ setup_args: else ev_name = strdup(contention_tracepoints[j].name); - if (!ev_name) + if (!ev_name) { + free(rec_argv); return -ENOMEM; + } rec_argv[i++] = "-e"; rec_argv[i++] = ev_name; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dcf288a4fb9a9ad9281b2892272b4918c8f760e5..91e6828c38cc2ef4c6b4d28d842309ce4e475f8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -270,7 +270,7 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused, static int record__aio_enabled(struct record *rec); static int record__comp_enabled(struct record *rec); -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size); #ifdef HAVE_AIO_SUPPORT @@ -405,9 +405,13 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size */ if (record__comp_enabled(aio->rec)) { - size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, - mmap__mmap_len(map) - aio->size, - buf, size); + ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, + mmap__mmap_len(map) - aio->size, + buf, size); + if (compressed < 0) + return (int)compressed; + + size = compressed; } else { memcpy(aio->data + aio->size, buf, size); } @@ -633,7 +637,13 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); + ssize_t compressed = zstd_compress(rec->session, map, map->data, + mmap__mmap_len(map), bf, size); + + if (compressed < 0) + return (int)compressed; + + size = compressed; bf = map->data; } @@ -1350,7 +1360,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -1527,10 +1537,10 @@ static size_t process_comp_header(void *record, size_t increment) return size; } -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size) { - size_t compressed; + ssize_t compressed; size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; struct zstd_data *zstd_data = &session->zstd_data; @@ -1539,6 +1549,8 @@ static size_t zstd_compress(struct perf_session *session, struct mmap *map, compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, max_record_size, process_comp_header); + if (compressed < 0) + return compressed; if (map && map->file) { thread->bytes_transferred += src_size; @@ -1912,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, static void record__read_lost_samples(struct record *rec) { struct perf_session *session = rec->session; - struct perf_record_lost_samples *lost; + struct perf_record_lost_samples *lost = NULL; struct evsel *evsel; /* there was an error during record__open */ if (session->evlist == NULL) return; - lost = zalloc(PERF_SAMPLE_MAX_SIZE); - if (lost == NULL) { - pr_debug("Memory allocation failed\n"); - return; - } - - lost->header.type = PERF_RECORD_LOST_SAMPLES; - evlist__for_each_entry(session->evlist, evsel) { struct xyarray *xy = evsel->core.sample_id; u64 lost_count; @@ -1949,6 +1953,15 @@ static void record__read_lost_samples(struct record *rec) } if (count.lost) { + if (!lost) { + lost = zalloc(sizeof(*lost) + + session->machines.host.id_hdr_size); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, x, y, count.lost, 0); } @@ -1956,9 +1969,19 @@ static void record__read_lost_samples(struct record *rec) } lost_count = perf_bpf_filter__lost_count(evsel); - if (lost_count) + if (lost_count) { + if (!lost) { + lost = zalloc(sizeof(*lost) + + session->machines.host.id_hdr_size); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, PERF_RECORD_MISC_LOST_SAMPLES_BPF); + } } out: free(lost); @@ -2216,32 +2239,6 @@ static void hit_auxtrace_snapshot_trigger(struct record *rec) } } -static void record__uniquify_name(struct record *rec) -{ - struct evsel *pos; - struct evlist *evlist = rec->evlist; - char *new_name; - int ret; - - if (perf_pmus__num_core_pmus() == 1) - return; - - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; - - if (strchr(pos->name, '/')) - continue; - - ret = asprintf(&new_name, "%s/%s/", - pos->pmu_name, pos->name); - if (ret) { - free(pos->name); - pos->name = new_name; - } - } -} - static int record__terminate_thread(struct record_thread *thread_data) { int err; @@ -2475,7 +2472,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (data->is_pipe && rec->evlist->core.nr_entries == 1) rec->opts.sample_id = true; - record__uniquify_name(rec); + evlist__uniquify_name(rec->evlist); /* Debug message used by test scripts */ pr_debug3("perf record opening and mmapping events\n"); @@ -3580,9 +3577,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp if (cpu_map__is_dummy(cpus)) return 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - if (cpu.cpu == -1) - continue; + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; @@ -3989,6 +3984,8 @@ int cmd_record(int argc, const char **argv) # undef set_nobuild #endif + /* Disable eager loading of kernel symbols that adds overhead to perf record. */ + symbol_conf.lazy_load_kernel_maps = true; rec->opts.affinity = PERF_AFFINITY_SYS; rec->evlist = evlist__new(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9cb1da2dc0c03bbe7d0b0b63c64cc7c6133e0823..f2ed2b7e80a32649095f123b63b3ed8ecaf42cc9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -96,9 +96,9 @@ struct report { bool stitch_lbr; bool disable_order; bool skip_empty; + bool data_type; int max_stack; struct perf_read_values show_threads_values; - struct annotation_options annotation_opts; const char *pretty_printing_style; const char *cpu_list; const char *symbol_filter_str; @@ -171,7 +171,7 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - if (!ui__has_annotation() && !rep->symbol_ipc) + if (!ui__has_annotation() && !rep->symbol_ipc && !rep->data_type) return 0; if (sort__mode == SORT_MODE__BRANCH) { @@ -541,8 +541,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, - &rep->session->header.env, - &rep->annotation_opts); + &rep->session->header.env); if (ret != 0) return ret; } @@ -574,8 +573,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos, - NULL, NULL); + rep->min_percent, pos, NULL); continue; } @@ -670,7 +668,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true, &rep->annotation_opts); + &session->header.env, true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -745,7 +743,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg) if (rep->symbol_ipc && sym && !sym->annotate2) { struct evsel *evsel = hists_to_evsel(he->hists); - symbol__annotate2(&he->ms, evsel, &rep->annotation_opts, NULL); + symbol__annotate2(&he->ms, evsel, NULL); } return 0; @@ -859,27 +857,47 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } -static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +struct maps__fprintf_task_args { + int indent; + FILE *fp; + size_t printed; +}; + +static int maps__fprintf_task_cb(struct map *map, void *data) { - size_t printed = 0; - struct map_rb_node *rb_node; + struct maps__fprintf_task_args *args = data; + const struct dso *dso = map__dso(map); + u32 prot = map__prot(map); + int ret; - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - const struct dso *dso = map__dso(map); - u32 prot = map__prot(map); + ret = fprintf(args->fp, + "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + args->indent, "", map__start(map), map__end(map), + prot & PROT_READ ? 'r' : '-', + prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-', + map__flags(map) ? 's' : 'p', + map__pgoff(map), + dso->id.ino, dso->name); - printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", - indent, "", map__start(map), map__end(map), - prot & PROT_READ ? 'r' : '-', - prot & PROT_WRITE ? 'w' : '-', - prot & PROT_EXEC ? 'x' : '-', - map__flags(map) ? 's' : 'p', - map__pgoff(map), - dso->id.ino, dso->name); - } + if (ret < 0) + return ret; + + args->printed += ret; + return 0; +} + +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + struct maps__fprintf_task_args args = { + .indent = indent, + .fp = fp, + .printed = 0, + }; - return printed; + maps__for_each_map(maps, maps__fprintf_task_cb, &args); + + return args.printed; } static void task__print_level(struct task *task, FILE *fp, int level) @@ -1341,15 +1359,15 @@ int cmd_report(int argc, const char **argv) "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), - OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), @@ -1401,7 +1419,7 @@ int cmd_report(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), - OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), @@ -1426,7 +1444,14 @@ int cmd_report(int argc, const char **argv) if (ret < 0) goto exit; - annotation_options__init(&report.annotation_opts); + /* + * tasks_mode require access to exited threads to list those that are in + * the data file. Off-cpu events are synthesized after other events and + * reference exited threads. + */ + symbol_conf.keep_exited_threads = true; + + annotation_options__init(); ret = perf_config(report__config, &report); if (ret) @@ -1445,13 +1470,13 @@ int cmd_report(int argc, const char **argv) } if (disassembler_style) { - report.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!report.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - report.annotation_opts.objdump_path = strdup(objdump_path); - if (!report.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1460,7 +1485,7 @@ int cmd_report(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&report.annotation_opts) < 0) { + if (annotate_check_args() < 0) { ret = -EINVAL; goto exit; } @@ -1615,6 +1640,16 @@ repeat: sort_order = NULL; } + if (sort_order && strstr(sort_order, "type")) { + report.data_type = true; + annotate_opts.annotate_src = false; + +#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); + goto error; +#endif + } + if (strcmp(input_name, "-") != 0) setup_browser(true); else @@ -1673,7 +1708,7 @@ repeat: * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (ui__has_annotation() || report.symbol_ipc || + if (ui__has_annotation() || report.symbol_ipc || report.data_type || report.total_cycles_mode) { ret = symbol__annotation_init(); if (ret < 0) @@ -1692,7 +1727,7 @@ repeat: */ symbol_conf.priv_size += sizeof(u32); } - annotation_config__init(&report.annotation_opts); + annotation_config__init(); } if (symbol__init(&session->header.env) < 0) @@ -1746,7 +1781,7 @@ error: zstd_fini(&(session->zstd_data)); perf_session__delete(session); exit: - annotation_options__exit(&report.annotation_opts); + annotation_options__exit(); free(sort_order_help); free(field_order_help); return ret; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a3af805a1d572d101b80fefbdddd04b41413629e..5fe9abc6a52418f3b5612c8e5e38d4d052c31f98 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -653,7 +653,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if ((evsel__leader(counter) != counter) || !(counter->core.leader->nr_members > 1)) return COUNTER_SKIP; - } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; @@ -1204,8 +1204,9 @@ static struct option stat_options[] = { OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, - "disable CPU count aggregation", AGGR_NONE), - OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), + "disable aggregation across CPUs or PMUs", AGGR_NONE), + OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode, + "disable aggregation the same as -A or -no-aggr", AGGR_NONE), OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, "Merge identical named hybrid events"), OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", @@ -1255,7 +1256,7 @@ static struct option stat_options[] = { OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, "don't try to share events between metrics in a group"), OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, - "don't try to share events between metrics in a group "), + "disable adding events for the metric threshold calculation"), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure top-down statistics"), OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, @@ -1316,7 +1317,7 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map) * be the first online CPU in the cache domain else use the * first online CPU of the cache domain as the ID. */ - if (perf_cpu_map__empty(cpu_map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map)) id = cpu.cpu; else id = perf_cpu_map__cpu(cpu_map, 0).cpu; @@ -1622,7 +1623,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - if (!perf_cpu_map__empty(evsel_list->core.user_requested_cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; else nr = 0; @@ -2289,7 +2290,7 @@ int process_stat_config_event(struct perf_session *session, perf_event__read_stat_config(&stat_config, &event->stat_config); - if (perf_cpu_map__empty(st->cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); } else if (st->aggr_mode != AGGR_UNSET) { @@ -2695,15 +2696,19 @@ int cmd_stat(int argc, const char **argv) */ if (metrics) { const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + int ret = metricgroup__parse_groups(evsel_list, pmu, metrics, + stat_config.metric_no_group, + stat_config.metric_no_merge, + stat_config.metric_no_threshold, + stat_config.user_requested_cpu_list, + stat_config.system_wide, + &stat_config.metric_events); - metricgroup__parse_groups(evsel_list, pmu, metrics, - stat_config.metric_no_group, - stat_config.metric_no_merge, - stat_config.metric_no_threshold, - stat_config.user_requested_cpu_list, - stat_config.system_wide, - &stat_config.metric_events); zfree(&metrics); + if (ret) { + status = ret; + goto out; + } } if (add_default_attributes()) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ea8c7eca5eeedd7616976139b288b3ff5c8c95d4..baf1ab083436e3f980157cb5d3646d6ccc59a40c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -147,7 +147,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); + err = symbol__annotate(&he->ms, evsel, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -261,9 +261,9 @@ static void perf_top__show_details(struct perf_top *top) goto out_unlock; printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); + printf(" Events Pcnt (>=%d%%)\n", annotate_opts.min_pcnt); - more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); + more = symbol__annotate_printf(&he->ms, top->sym_evsel); if (top->evlist->enabled) { if (top->zero) @@ -450,7 +450,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -553,7 +553,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&top->annotation_opts.min_pcnt, + prompt_percent(&annotate_opts.min_pcnt, "Enter details display event filter (percent)"); break; case 'K': @@ -646,8 +646,7 @@ repeat: } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, !top->record_opts.overwrite, - &top->annotation_opts); + &top->session->header.env, !top->record_opts.overwrite); if (ret == K_RELOAD) { top->zero = true; goto repeat; @@ -1027,8 +1026,8 @@ static int perf_top__start_counters(struct perf_top *top) evlist__for_each_entry(evlist, counter) { try_again: - if (evsel__open(counter, top->evlist->core.user_requested_cpus, - top->evlist->core.threads) < 0) { + if (evsel__open(counter, counter->core.cpus, + counter->core.threads) < 0) { /* * Specially handle overwrite fall back. @@ -1044,7 +1043,7 @@ try_again: perf_top_overwrite_fallback(top, counter)) goto try_again; - if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -1241,9 +1240,9 @@ static int __cmd_top(struct perf_top *top) pthread_t thread, thread_process; int ret; - if (!top->annotation_opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&top->session->header.env, - &top->annotation_opts.objdump_path); + &annotate_opts.objdump_path); if (ret) return ret; } @@ -1299,6 +1298,7 @@ static int __cmd_top(struct perf_top *top) } } + evlist__uniquify_name(top->evlist); ret = perf_top__start_counters(top); if (ret) return ret; @@ -1536,9 +1536,9 @@ int cmd_top(int argc, const char **argv) "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), - OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), @@ -1549,9 +1549,9 @@ int cmd_top(int argc, const char **argv) "addr2line binary to use for line numbers"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", @@ -1609,10 +1609,10 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; - annotation_options__init(&top.annotation_opts); + annotation_options__init(); - top.annotation_opts.min_pcnt = 5; - top.annotation_opts.context = 4; + annotate_opts.min_pcnt = 5; + annotate_opts.context = 4; top.evlist = evlist__new(); if (top.evlist == NULL) @@ -1642,13 +1642,13 @@ int cmd_top(int argc, const char **argv) usage_with_options(top_usage, options); if (disassembler_style) { - top.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!top.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - top.annotation_opts.objdump_path = strdup(objdump_path); - if (!top.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1661,7 +1661,7 @@ int cmd_top(int argc, const char **argv) if (status) goto out_delete_evlist; - if (annotate_check_args(&top.annotation_opts) < 0) + if (annotate_check_args() < 0) goto out_delete_evlist; if (!top.evlist->core.nr_entries) { @@ -1787,7 +1787,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(&top.annotation_opts); + annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); @@ -1840,7 +1840,7 @@ int cmd_top(int argc, const char **argv) out_delete_evlist: evlist__delete(top.evlist); perf_session__delete(top.session); - annotation_options__exit(&top.annotation_opts); + annotation_options__exit(); return status; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e541d0e2777ab935a6274d0a8aeaa46ffe3f8247..109b8e64fe69ae32fee0ee7b6937d260d32b6203 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2470,9 +2470,8 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam static const char *errno_to_name(struct evsel *evsel, int err) { struct perf_env *env = evsel__env(evsel); - const char *arch_name = perf_env__arch(env); - return arch_syscalls__strerrno(arch_name, err); + return perf_env__arch_strerrno(env, err); } static int trace__sys_exit(struct trace *trace, struct evsel *evsel, @@ -4264,12 +4263,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); if (trace->errno_summary && stats->nr_failures) { - const char *arch_name = perf_env__arch(trace->host->env); int e; for (e = 0; e < stats->max_errno; ++e) { if (stats->errnos[e] != 0) - fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); + fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]); } } } diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh old mode 100644 new mode 100755 index 133f0eddbcc4604e6731cd53aa62979398f36bd9..f94795794b3614a90be78701ac47ac0a3e824a15 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -4,8 +4,73 @@ # Arnaldo Carvalho de Melo PERF_DATA=perf.data -if [ $# -ne 0 ] ; then - PERF_DATA=$1 +PERF_SYMBOLS=perf.symbols +PERF_ALL=perf.all +ALL=0 +UNPACK=0 + +while [ $# -gt 0 ] ; do + if [ $1 == "--all" ]; then + ALL=1 + shift + elif [ $1 == "--unpack" ]; then + UNPACK=1 + shift + else + PERF_DATA=$1 + UNPACK_TAR=$1 + shift + fi +done + +if [ $UNPACK -eq 1 ]; then + if [ ! -z "$UNPACK_TAR" ]; then # tar given as an argument + if [ ! -e "$UNPACK_TAR" ]; then + echo "Provided file $UNPACK_TAR does not exist" + exit 1 + fi + TARGET="$UNPACK_TAR" + else # search for perf tar in the current directory + TARGET=`find . -regex "\./perf.*\.tar\.bz2"` + TARGET_NUM=`echo -n "$TARGET" | grep -c '^'` + + if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then + echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET" + echo "Provide the requested file as an argument" + exit 1 + else + echo "Found target file for unpacking: $TARGET" + fi + fi + + if [[ "$TARGET" =~ (\./)?$PERF_ALL.*.tar.bz2 ]]; then # perf tar generated by --all option + TAR_CONTENTS=`tar tvf "$TARGET" | tr -s " " | cut -d " " -f 6` + VALID_TAR=`echo "$TAR_CONTENTS" | grep "$PERF_SYMBOLS.tar.bz2" | wc -l` # check if it contains a sub-tar perf.symbols + if [ $VALID_TAR -ne 1 ]; then + echo "Error: $TARGET file is not valid (contains zero or multiple sub-tar files with debug symbols)" + exit 1 + fi + + INTERSECT=`comm -12 <(ls) <(echo "$TAR_CONTENTS") | tr "\n" " "` # check for overwriting + if [ ! -z "$INTERSECT" ]; then # prompt if file(s) already exist in the current directory + echo "File(s) ${INTERSECT::-1} already exist in the current directory." + while true; do + read -p 'Do you wish to overwrite them? ' yn + case $yn in + [Yy]* ) break;; + [Nn]* ) exit 1;; + * ) echo "Please answer yes or no.";; + esac + done + fi + + # unzip the perf.data file in the current working directory and debug symbols in ~/.debug directory + tar xvf $TARGET && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug + + else # perf tar generated by perf archive (contains only debug symbols) + tar xvf $TARGET -C ~/.debug + fi + exit 0 fi # @@ -39,9 +104,18 @@ while read build_id ; do echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done -tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST -rm $MANIFEST $BUILDIDS || true +if [ $ALL -eq 1 ]; then # pack perf.data file together with tar containing debug symbols + HOSTNAME=$(hostname) + DATE=$(date '+%Y%m%d-%H%M%S') + tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + tar cjf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2 + rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true +else # pack only the debug symbols + tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + rm $MANIFEST $BUILDIDS || true +fi + echo -e "Now please run:\n" -echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" -echo "wherever you need to run 'perf report' on." +echo -e "$ perf archive --unpack\n" +echo "or unpack the tar manually wherever you need to run 'perf report' on." exit 0 diff --git a/tools/perf/perf.c b/tools/perf/perf.c index d3fc8090413c8c289b55c006cde3ad3388708a9d..921bee0a643707ec596620b00052cd5989f4f56e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -39,6 +39,7 @@ #include static int use_pager = -1; +static FILE *debug_fp = NULL; struct cmd_struct { const char *cmd; @@ -162,6 +163,19 @@ static void commit_pager_choice(void) } } +static int set_debug_file(const char *path) +{ + debug_fp = fopen(path, "w"); + if (!debug_fp) { + fprintf(stderr, "Open debug file '%s' failed: %s\n", + path, strerror(errno)); + return -1; + } + + debug_set_file(debug_fp); + return 0; +} + struct option options[] = { OPT_ARGUMENT("help", "help"), OPT_ARGUMENT("version", "version"), @@ -174,6 +188,7 @@ struct option options[] = { OPT_ARGUMENT("list-cmds", "list-cmds"), OPT_ARGUMENT("list-opts", "list-opts"), OPT_ARGUMENT("debug", "debug"), + OPT_ARGUMENT("debug-file", "debug-file"), OPT_END() }; @@ -287,6 +302,18 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argv)++; (*argc)--; + } else if (!strcmp(cmd, "--debug-file")) { + if (*argc < 2) { + fprintf(stderr, "No path given for --debug-file.\n"); + usage(perf_usage_string); + } + + if (set_debug_file((*argv)[1])) + usage(perf_usage_string); + + (*argv)++; + (*argc)--; + } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -547,5 +574,8 @@ int main(int argc, const char **argv) fprintf(stderr, "Failed to run command '%s': %s\n", cmd, str_error_r(errno, sbuf, sizeof(sbuf))); out: + if (debug_fp) + fclose(debug_fp); + return 1; } diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json index 88b23b85e33cd0784eeca99e1994250a6509b2fc..879ff21e0b177c6a015dccd0ab4f016787deb01d 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json @@ -110,7 +110,7 @@ { "PublicDescription": "Flushes due to memory hazards", "EventCode": "0x121", - "EventName": "BPU_FLUSH_MEM_FAULT", + "EventName": "GPC_FLUSH_MEM_FAULT", "BriefDescription": "Flushes due to memory hazards" }, { diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json new file mode 100644 index 0000000000000000000000000000000000000000..a632755fc08695da42d4635a14d9e14a2d5ac9bf --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json @@ -0,0 +1,125 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + }, + { + "PublicDescription": "Instruction architecturally executed, branch not taken", + "EventCode": "0x8107", + "EventName": "BR_SKIP_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, immediate branch taken", + "EventCode": "0x8108", + "EventName": "BR_IMMED_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, immediate branch taken" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired", + "EventCode": "0x810c", + "EventName": "BR_INDNR_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted immediate branch", + "EventCode": "0x8110", + "EventName": "BR_IMMED_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch", + "EventCode": "0x8111", + "EventName": "BR_IMMED_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch", + "EventCode": "0x8112", + "EventName": "BR_IND_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch", + "EventCode": "0x8113", + "EventName": "BR_IND_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted procedure return", + "EventCode": "0x8114", + "EventName": "BR_RETURN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted procedure return", + "EventCode": "0x8115", + "EventName": "BR_RETURN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return", + "EventCode": "0x8116", + "EventName": "BR_INDNR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return", + "EventCode": "0x8117", + "EventName": "BR_INDNR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, taken", + "EventCode": "0x8118", + "EventName": "BR_TAKEN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken", + "EventCode": "0x8119", + "EventName": "BR_TAKEN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, not taken", + "EventCode": "0x811a", + "EventName": "BR_SKIP_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken", + "EventCode": "0x811b", + "EventName": "BR_SKIP_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch", + "EventCode": "0x811c", + "EventName": "BR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch", + "EventCode": "0x811d", + "EventName": "BR_IND_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch" + }, + { + "PublicDescription": "Branch Record captured.", + "EventCode": "0x811f", + "EventName": "BRB_FILTRATE", + "BriefDescription": "Branch Record captured." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json new file mode 100644 index 0000000000000000000000000000000000000000..2aeb9907831d627845aefb3b6ef481da3cfd53ba --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c50d8e930b05ee885a9f966bbeebbedb79d4984d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json @@ -0,0 +1,206 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "PublicDescription": "Level 1 data or unified cache demand access", + "EventCode": "0x8140", + "EventName": "L1D_CACHE_RW", + "BriefDescription": "Level 1 data or unified cache demand access" + }, + { + "PublicDescription": "Level 1 data or unified cache preload or prefetch", + "EventCode": "0x8142", + "EventName": "L1D_CACHE_PRFM", + "BriefDescription": "Level 1 data or unified cache preload or prefetch" + }, + { + "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch", + "EventCode": "0x8146", + "EventName": "L1D_CACHE_REFILL_PRFM", + "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "PublicDescription": "L1D TLB miss", + "EventCode": "0xD600", + "EventName": "L1D_TLB_MISS", + "BriefDescription": "L1D TLB miss" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch requests generated", + "EventCode": "0xd606", + "EventName": "L1_PREFETCH_LD_GEN", + "BriefDescription": "Level 1 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache", + "EventCode": "0xd607", + "EventName": "L1_PREFETCH_LD_FILL", + "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated", + "EventCode": "0xd608", + "EventName": "L1_PREFETCH_L2_REQ", + "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated" + }, + { + "PublicDescription": "L1 prefetcher, distance was reset", + "EventCode": "0xd609", + "EventName": "L1_PREFETCH_DIST_RST", + "BriefDescription": "L1 prefetcher, distance was reset" + }, + { + "PublicDescription": "L1 prefetcher, distance was increased", + "EventCode": "0xd60a", + "EventName": "L1_PREFETCH_DIST_INC", + "BriefDescription": "L1 prefetcher, distance was increased" + }, + { + "PublicDescription": "Level 1 prefetcher, table entry is trained", + "EventCode": "0xd60b", + "EventName": "L1_PREFETCH_ENTRY_TRAINED", + "BriefDescription": "Level 1 prefetcher, table entry is trained" + }, + { + "PublicDescription": "L1 data cache refill - Read or Write", + "EventCode": "0xd60e", + "EventName": "L1D_CACHE_REFILL_RW", + "BriefDescription": "L1 data cache refill - Read or Write" + }, + { + "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills", + "EventCode": "0xD701", + "EventName": "L2C_INST_REFILL", + "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills" + }, + { + "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills", + "EventCode": "0xD702", + "EventName": "L2C_DATA_REFILL", + "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills" + }, + { + "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated", + "EventCode": "0xD703", + "EventName": "L2_PREFETCH_REQ", + "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json new file mode 100644 index 0000000000000000000000000000000000000000..eb5a2208d26048c9cf5c63352bd78db48ff7cff8 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json @@ -0,0 +1,464 @@ +[ + { + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", + "EventCode": "0x10A", + "EventName": "L2_PREFETCH_REFILL", + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" + }, + { + "PublicDescription": "Level 2 prefetch requests, late", + "EventCode": "0x10B", + "EventName": "L2_PREFETCH_UPGRADE", + "BriefDescription": "Level 2 prefetch requests, late" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", + "EventCode": "0x110", + "EventName": "BPU_HIT_BTB", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", + "EventCode": "0x111", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", + "EventCode": "0x112", + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", + "EventCode": "0x113", + "EventName": "BPU_HIT_RSB", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" + }, + { + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", + "EventCode": "0x114", + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted", + "EventCode": "0x115", + "EventName": "BPU_BRANCH_NO_HIT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", + "EventCode": "0x116", + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", + "EventCode": "0x117", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", + "EventCode": "0x118", + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", + "EventCode": "0x119", + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", + "EventCode": "0x11a", + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11b", + "EventName": "BPU_NO_PREDICTION_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Preditable branch update the BTB region buffer entry", + "EventCode": "0x11c", + "EventName": "BPU_BTB_UPDATE", + "BriefDescription": "Preditable branch update the BTB region buffer entry" + }, + { + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", + "EventCode": "0x11d", + "EventName": "BPU_RSB_FULL_STALL", + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" + }, + { + "PublicDescription": "Macro-ops speculatively decoded", + "EventCode": "0x11f", + "EventName": "ICF_INST_SPEC_DECODE", + "BriefDescription": "Macro-ops speculatively decoded" + }, + { + "PublicDescription": "Flushes", + "EventCode": "0x120", + "EventName": "GPC_FLUSH", + "BriefDescription": "Flushes" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "GPC_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + }, + { + "PublicDescription": "ETM extout bit 0", + "EventCode": "0x141", + "EventName": "MSC_ETM_EXTOUT0", + "BriefDescription": "ETM extout bit 0" + }, + { + "PublicDescription": "ETM extout bit 1", + "EventCode": "0x142", + "EventName": "MSC_ETM_EXTOUT1", + "BriefDescription": "ETM extout bit 1" + }, + { + "PublicDescription": "ETM extout bit 2", + "EventCode": "0x143", + "EventName": "MSC_ETM_EXTOUT2", + "BriefDescription": "ETM extout bit 2" + }, + { + "PublicDescription": "ETM extout bit 3", + "EventCode": "0x144", + "EventName": "MSC_ETM_EXTOUT3", + "BriefDescription": "ETM extout bit 3" + }, + { + "PublicDescription": "Bus request sn", + "EventCode": "0x156", + "EventName": "L2C_SNOOP", + "BriefDescription": "Bus request sn" + }, + { + "PublicDescription": "L2 TXDAT LCRD blocked", + "EventCode": "0x169", + "EventName": "L2C_DAT_CRD_STALL", + "BriefDescription": "L2 TXDAT LCRD blocked" + }, + { + "PublicDescription": "L2 TXRSP LCRD blocked", + "EventCode": "0x16a", + "EventName": "L2C_RSP_CRD_STALL", + "BriefDescription": "L2 TXRSP LCRD blocked" + }, + { + "PublicDescription": "L2 TXREQ LCRD blocked", + "EventCode": "0x16b", + "EventName": "L2C_REQ_CRD_STALL", + "BriefDescription": "L2 TXREQ LCRD blocked" + }, + { + "PublicDescription": "Early mispredict", + "EventCode": "0xD100", + "EventName": "ICF_EARLY_MIS_PRED", + "BriefDescription": "Early mispredict" + }, + { + "PublicDescription": "FEQ full cycles", + "EventCode": "0xD101", + "EventName": "ICF_FEQ_FULL", + "BriefDescription": "FEQ full cycles" + }, + { + "PublicDescription": "Instruction FIFO Full", + "EventCode": "0xD102", + "EventName": "ICF_INST_FIFO_FULL", + "BriefDescription": "Instruction FIFO Full" + }, + { + "PublicDescription": "L1I TLB miss", + "EventCode": "0xD103", + "EventName": "L1I_TLB_MISS", + "BriefDescription": "L1I TLB miss" + }, + { + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", + "EventCode": "0xD104", + "EventName": "ICF_STALL", + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" + }, + { + "PublicDescription": "PC FIFO Full", + "EventCode": "0xD105", + "EventName": "ICF_PC_FIFO_FULL", + "BriefDescription": "PC FIFO Full" + }, + { + "PublicDescription": "Stall due to BOB ID", + "EventCode": "0xD200", + "EventName": "IDR_STALL_BOB_ID", + "BriefDescription": "Stall due to BOB ID" + }, + { + "PublicDescription": "Dispatch stall due to LOB entries", + "EventCode": "0xD201", + "EventName": "IDR_STALL_LOB_ID", + "BriefDescription": "Dispatch stall due to LOB entries" + }, + { + "PublicDescription": "Dispatch stall due to SOB entries", + "EventCode": "0xD202", + "EventName": "IDR_STALL_SOB_ID", + "BriefDescription": "Dispatch stall due to SOB entries" + }, + { + "PublicDescription": "Dispatch stall due to IXU scheduler entries", + "EventCode": "0xD203", + "EventName": "IDR_STALL_IXU_SCHED", + "BriefDescription": "Dispatch stall due to IXU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to FSU scheduler entries", + "EventCode": "0xD204", + "EventName": "IDR_STALL_FSU_SCHED", + "BriefDescription": "Dispatch stall due to FSU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to ROB entries", + "EventCode": "0xD205", + "EventName": "IDR_STALL_ROB_ID", + "BriefDescription": "Dispatch stall due to ROB entries" + }, + { + "PublicDescription": "Dispatch stall due to flush", + "EventCode": "0xD206", + "EventName": "IDR_STALL_FLUSH", + "BriefDescription": "Dispatch stall due to flush" + }, + { + "PublicDescription": "Dispatch stall due to WFI", + "EventCode": "0xD207", + "EventName": "IDR_STALL_WFI", + "BriefDescription": "Dispatch stall due to WFI" + }, + { + "PublicDescription": "Number of SWOB drains triggered by timeout", + "EventCode": "0xD208", + "EventName": "IDR_STALL_SWOB_TIMEOUT", + "BriefDescription": "Number of SWOB drains triggered by timeout" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", + "EventCode": "0xD209", + "EventName": "IDR_STALL_SWOB_RAW", + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", + "EventCode": "0xD20A", + "EventName": "IDR_STALL_SWOB_FULL", + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", + "EventCode": "0xD20B", + "EventName": "STALL_FRONTEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data cache miss", + "EventCode": "0xD20D", + "EventName": "STALL_BACKEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 data cache miss" + }, + { + "PublicDescription": "Dispatch stall due to lack of any core resource", + "EventCode": "0xD20F", + "EventName": "STALL_BACKEND_RESOURCE", + "BriefDescription": "Dispatch stall due to lack of any core resource" + }, + { + "PublicDescription": "Instructions issued by the scheduler", + "EventCode": "0xD300", + "EventName": "IXU_NUM_UOPS_ISSUED", + "BriefDescription": "Instructions issued by the scheduler" + }, + { + "PublicDescription": "Any uop issued was canceled for any reason", + "EventCode": "0xD301", + "EventName": "IXU_ISSUE_CANCEL", + "BriefDescription": "Any uop issued was canceled for any reason" + }, + { + "PublicDescription": "A load wakeup to the scheduler has been canceled", + "EventCode": "0xD302", + "EventName": "IXU_LOAD_CANCEL", + "BriefDescription": "A load wakeup to the scheduler has been canceled" + }, + { + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", + "EventCode": "0xD303", + "EventName": "IXU_SLOW_CANCEL", + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA", + "EventCode": "0xD304", + "EventName": "IXU_IXA_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", + "EventCode": "0xD305", + "EventName": "IXU_IXA_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", + "EventCode": "0xD306", + "EventName": "IXU_IXA_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB", + "EventCode": "0xD307", + "EventName": "IXU_IXB_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", + "EventCode": "0xD308", + "EventName": "IXU_IXB_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", + "EventCode": "0xD309", + "EventName": "IXU_IXB_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC", + "EventCode": "0xD30A", + "EventName": "IXU_IXC_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", + "EventCode": "0xD30B", + "EventName": "IXU_IXC_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", + "EventCode": "0xD30C", + "EventName": "IXU_IXC_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD", + "EventCode": "0xD30D", + "EventName": "IXU_IXD_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", + "EventCode": "0xD30E", + "EventName": "IXU_IXD_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", + "EventCode": "0xD30F", + "EventName": "IXU_IXD_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" + }, + { + "PublicDescription": "Uops issued by the FSU scheduler", + "EventCode": "0xD400", + "EventName": "FSU_ISSUED", + "BriefDescription": "Uops issued by the FSU scheduler" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSX", + "EventCode": "0xD401", + "EventName": "FSU_FSX_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSX" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSY", + "EventCode": "0xD402", + "EventName": "FSU_FSY_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSY" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSZ", + "EventCode": "0xD403", + "EventName": "FSU_FSZ_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSZ" + }, + { + "PublicDescription": "Uops canceled (load cancels)", + "EventCode": "0xD404", + "EventName": "FSU_CANCEL", + "BriefDescription": "Uops canceled (load cancels)" + }, + { + "PublicDescription": "Count scheduler stalls due to divide/sqrt", + "EventCode": "0xD405", + "EventName": "FSU_DIV_SQRT_STALL", + "BriefDescription": "Count scheduler stalls due to divide/sqrt" + }, + { + "PublicDescription": "Number of SWOB drains", + "EventCode": "0xD500", + "EventName": "GPC_SWOB_DRAIN", + "BriefDescription": "Number of SWOB drains" + }, + { + "PublicDescription": "GPC detected a Breakpoint instruction match", + "EventCode": "0xD501", + "EventName": "BREAKPOINT_MATCH", + "BriefDescription": "GPC detected a Breakpoint instruction match" + }, + { + "PublicDescription": "Core progress monitor triggered", + "EventCode": "0xd502", + "EventName": "GPC_CPM_TRIGGER", + "BriefDescription": "Core progress monitor triggered" + }, + { + "PublicDescription": "Fill buffer full", + "EventCode": "0xD601", + "EventName": "OFB_FULL", + "BriefDescription": "Fill buffer full" + }, + { + "PublicDescription": "Load satisified from store forwarded data", + "EventCode": "0xD605", + "EventName": "LD_FROM_ST_FWD", + "BriefDescription": "Load satisified from store forwarded data" + }, + { + "PublicDescription": "Store retirement pipe stall", + "EventCode": "0xD60C", + "EventName": "LSU_ST_RETIRE_STALL", + "BriefDescription": "Store retirement pipe stall" + }, + { + "PublicDescription": "LSU detected a Watchpoint data match", + "EventCode": "0xD60D", + "EventName": "WATCHPOINT_MATCH", + "BriefDescription": "LSU detected a Watchpoint data match" + }, + { + "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature", + "EventCode": "0xda00", + "EventName": "MSC_ETM_COMMIT_STALL", + "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json new file mode 100644 index 0000000000000000000000000000000000000000..bd59ba7b74e42f118445bde613bc620f47d1776d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "EXC_SMC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json new file mode 100644 index 0000000000000000000000000000000000000000..a6a20f541e33389e4176b28a3eaac413e7b9a9b3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "ST_RETIRED" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_RETIRED" + }, + { + "ArchStdEvent": "BR_RETURN_RETIRED" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "PC_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Scalar", + "EventCode": "0xd210", + "EventName": "ASE_SCALAR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Scalar" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Vector", + "EventCode": "0xd211", + "EventName": "ASE_VECTOR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Vector" + }, + { + "PublicDescription": "Barrier speculatively executed, CSDB", + "EventCode": "0x7f", + "EventName": "CSDB_SPEC", + "BriefDescription": "Barrier speculatively executed, CSDB" + }, + { + "PublicDescription": "Prefetch sent to L2.", + "EventCode": "0xd106", + "EventName": "ICF_PREFETCH_DISPATCH", + "BriefDescription": "Prefetch sent to L2." + }, + { + "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.", + "EventCode": "0xd107", + "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE", + "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades." + }, + { + "PublicDescription": "Prefetch request missed TLB.", + "EventCode": "0xd108", + "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS", + "BriefDescription": "Prefetch request missed TLB." + }, + { + "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.", + "EventCode": "0xd109", + "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE", + "BriefDescription": "Prefetch request dropped since duplicate was found in TLB." + }, + { + "PublicDescription": "Prefetch request dropped since it was found in cache.", + "EventCode": "0xd10a", + "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT", + "BriefDescription": "Prefetch request dropped since it was found in cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json new file mode 100644 index 0000000000000000000000000000000000000000..7ecffb989ae04aefa7c8cde66f3f64b39bb88247 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json new file mode 100644 index 0000000000000000000000000000000000000000..a211d94aacde6b5e9e4c7f7fbe90b8dd507ac8cc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "LD_RETIRED" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "BPU_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c5d1d22bd034b10a8220e12b4d9b917d2f6d2ffa --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -0,0 +1,442 @@ +[ + { + "MetricName": "branch_miss_pred_rate", + "MetricExpr": "BR_MIS_PRED / BR_PRED", + "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch", + "MetricGroup": "branch", + "ScaleUnit": "100%" + }, + { + "MetricName": "bus_utilization", + "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)", + "BriefDescription": "Core-to-uncore bus utilization", + "MetricGroup": "Bus", + "ScaleUnit": "100percent of bus cycles" + }, + { + "MetricName": "l1d_cache_miss_ratio", + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.", + "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_miss_ratio", + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.", + "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "Miss_Ratio;l1d_cache_read_miss", + "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD", + "BriefDescription": "L1D cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l2_cache_miss_ratio", + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", + "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.", + "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_read_miss_rate", + "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE", + "BriefDescription": "L1I cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l2d_cache_read_miss_rate", + "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD", + "BriefDescription": "L2 cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l1d_cache_miss_mpki", + "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (data)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "l1i_cache_miss_mpki", + "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (instruction)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "simd_percentage", + "MetricExpr": "ASE_SPEC / INST_SPEC", + "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "crypto_percentage", + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", + "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "gflops", + "MetricExpr": "VFP_SPEC / (duration_time * 1e9)", + "BriefDescription": "Giga-floating point operations per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "integer_dp_percentage", + "MetricExpr": "DP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "ipc", + "MetricExpr": "INST_RETIRED / CPU_CYCLES", + "BriefDescription": "This metric measures the number of instructions retired per cycle.", + "MetricGroup": "General", + "ScaleUnit": "1per cycle" + }, + { + "MetricName": "load_percentage", + "MetricExpr": "LD_SPEC / INST_SPEC", + "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "load_store_spec_rate", + "MetricExpr": "LDST_SPEC / INST_SPEC", + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_mips", + "MetricExpr": "INST_RETIRED / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "spec_utilization_mips", + "MetricExpr": "INST_SPEC / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "PEutilization" + }, + { + "MetricName": "pc_write_spec_rate", + "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "store_percentage", + "MetricExpr": "ST_SPEC / INST_SPEC", + "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "scalar_fp_percentage", + "MetricExpr": "VFP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_rate", + "MetricExpr": "OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted", + "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))", + "BriefDescription": "Of all the micro-operations issued, what proportion are lost", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted_rate", + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "stall_backend_cache_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_resource_rate", + "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_tlb_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_cache_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_tlb_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "dtlb_walk_ratio", + "MetricExpr": "DTLB_WALK / L1D_TLB", + "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.", + "MetricGroup": "Miss_Ratio;DTLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "MetricName": "itlb_walk_ratio", + "MetricExpr": "ITLB_WALK / L1I_TLB", + "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.", + "MetricGroup": "Miss_Ratio;ITLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "ArchStdEvent": "backend_bound" + }, + { + "ArchStdEvent": "frontend_bound", + "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)" + }, + { + "MetricName": "slots_lost_misspeculation_fraction", + "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "retired_fraction", + "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "backend_core", + "MetricExpr": "(backend_bound / 100) - backend_memory", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "backend_memory", + "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "branch_mispredict", + "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction", + "BriefDescription": "Fraction of slots lost due to branch misprediciton", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_bandwidth", + "MetricExpr": "frontend_bound - frontend_latency", + "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_latency", + "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "other_miss_pred", + "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict", + "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "pipe_utilization", + "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))", + "BriefDescription": "Fraction of execute slots utilized", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "d_cache_l2_miss_rate", + "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_cache_miss_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_tlb_miss_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "fsu_pipe_utilization", + "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)", + "BriefDescription": "Fraction of FSU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_cache_miss_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_tlb_miss_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "ixu_pipe_utilization", + "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of IXU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_recovery_rate", + "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_fsu_sched_rate", + "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_ixu_sched_rate", + "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_lob_id_rate", + "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_rob_id_rate", + "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_sob_id_rate", + "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "l1d_cache_access_demand", + "MetricExpr": "L1D_CACHE_RW / L1D_CACHE", + "BriefDescription": "L1D cache access - demand", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_access_prefetces", + "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache access - prefetch", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses", + "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE", + "BriefDescription": "L1D cache demand misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_read", + "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - read", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_write", + "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - write", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_prefetch_misses", + "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache prefetch misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_scalar_mix", + "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_vector_mix", + "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json new file mode 100644 index 0000000000000000000000000000000000000000..66d83b680651eb823877d831f51955044f60a38a --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json @@ -0,0 +1,170 @@ +[ + { + "PublicDescription": "Level 2 data translation buffer allocation", + "EventCode": "0xD800", + "EventName": "MMU_D_OTB_ALLOC", + "BriefDescription": "Level 2 data translation buffer allocation" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd801", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd802", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd803", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd804", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd805", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd806", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Data-side S1 page walk cache lookup", + "EventCode": "0xd807", + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S1 page walk cache refill", + "EventCode": "0xd808", + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Data-side S2 page walk cache lookup", + "EventCode": "0xd809", + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S2 page walk cache refill", + "EventCode": "0xd80a", + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S2 page walk cache refill" + }, + { + "PublicDescription": "Data-side S1 table walk fault", + "EventCode": "0xD80B", + "EventName": "MMU_D_S1_WALK_FAULT", + "BriefDescription": "Data-side S1 table walk fault" + }, + { + "PublicDescription": "Data-side S2 table walk fault", + "EventCode": "0xD80C", + "EventName": "MMU_D_S2_WALK_FAULT", + "BriefDescription": "Data-side S2 table walk fault" + }, + { + "PublicDescription": "Data-side table walk steps or descriptor fetches", + "EventCode": "0xD80D", + "EventName": "MMU_D_WALK_STEPS", + "BriefDescription": "Data-side table walk steps or descriptor fetches" + }, + { + "PublicDescription": "Level 2 instruction translation buffer allocation", + "EventCode": "0xD900", + "EventName": "MMU_I_OTB_ALLOC", + "BriefDescription": "Level 2 instruction translation buffer allocation" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd901", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd902", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd903", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd904", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd905", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd906", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache lookup", + "EventCode": "0xd907", + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache refill", + "EventCode": "0xd908", + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache lookup", + "EventCode": "0xd909", + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache refill", + "EventCode": "0xd90a", + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S2 page walk cache refill" + }, + { + "PublicDescription": "Instruction-side S1 table walk fault", + "EventCode": "0xD90B", + "EventName": "MMU_I_S1_WALK_FAULT", + "BriefDescription": "Instruction-side S1 table walk fault" + }, + { + "PublicDescription": "Instruction-side S2 table walk fault", + "EventCode": "0xD90C", + "EventName": "MMU_I_S2_WALK_FAULT", + "BriefDescription": "Instruction-side S2 table walk fault" + }, + { + "PublicDescription": "Instruction-side table walk steps or descriptor fetches", + "EventCode": "0xD90D", + "EventName": "MMU_I_WALK_STEPS", + "BriefDescription": "Instruction-side table walk steps or descriptor fetches" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json new file mode 100644 index 0000000000000000000000000000000000000000..2fb2d1f183fc7795189c04198ecb5f085c754d12 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + }, + { + "PublicDescription": "Frontend stall cycles, TLB", + "EventCode": "0x815c", + "EventName": "STALL_FRONTEND_TLB", + "BriefDescription": "Frontend stall cycles, TLB" + }, + { + "PublicDescription": "Backend stall cycles, TLB", + "EventCode": "0x8167", + "EventName": "STALL_BACKEND_TLB", + "BriefDescription": "Backend stall cycles, TLB" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json new file mode 100644 index 0000000000000000000000000000000000000000..20f2165c85fec581226bc153571e13809e18104b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json index 428605c37d10bcb5aef284aaa5b8279085c0002d..5ec157c39f0df134412ec65c694cdc08297e78e5 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json @@ -107,7 +107,7 @@ "EventName": "hnf_qos_hh_retry", "EventidCode": "0xe", "NodeType": "0x5", - "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN‑F.", + "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN-F.", "Unit": "arm_cmn", "Compat": "(434|436|43c|43a).*" }, diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 5b58db5032c11fad8c1dd7e7cf4fd8124772aafd..f4d1ca4d1493ddb68ef1cdb9097732d5163e9f4c 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -42,3 +42,4 @@ 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core 0x00000000c00fac30,v1,ampere/ampereone,core +0x00000000c00fac40,v1,ampere/ampereonex,core diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index f4908af7ad66b48b4da10e1f47fe1b6ab23dc77f..599a588dbeb40070fec7e3dd2c721edcb73dbb09 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -11,8 +11,7 @@ # # Multiple PVRs could map to a single JSON file. # - -# Power8 entries 0x004[bcd][[:xdigit:]]{4},1,power8,core +0x0066[[:xdigit:]]{4},1,power8,core 0x004e[[:xdigit:]]{4},1,power9,core 0x0080[[:xdigit:]]{4},1,power10,core diff --git a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json index 6b0356f2d301384e9e29a29ef34f4526f4d01c14..0eeaaf1a95b863bac3772f4a47018f6574861316 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json @@ -99,6 +99,11 @@ "EventName": "PM_INST_FROM_L2MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss." }, + { + "EventCode": "0x0003C0000000C040", + "EventName": "PM_DATA_FROM_L2MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss." + }, { "EventCode": "0x000380000010C040", "EventName": "PM_INST_FROM_L2MISS_ALL", @@ -161,9 +166,14 @@ }, { "EventCode": "0x000780000000C040", - "EventName": "PM_INST_FROM_L3MISS", + "EventName": "PM_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss." }, + { + "EventCode": "0x0007C0000000C040", + "EventName": "PM_DATA_FROM_L3MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss." + }, { "EventCode": "0x000780000010C040", "EventName": "PM_INST_FROM_L3MISS_ALL", @@ -981,7 +991,7 @@ }, { "EventCode": "0x0003C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L2MISS", + "EventName": "PM_MRK_DATA_FROM_L2MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction." }, { @@ -1046,12 +1056,12 @@ }, { "EventCode": "0x000780000000C142", - "EventName": "PM_MRK_INST_FROM_L3MISS", + "EventName": "PM_MRK_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { "EventCode": "0x0007C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L3MISS", + "EventName": "PM_MRK_DATA_FROM_L3MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv index c61b3d6ef6166a906164c0fe2732199b96843621..cfc449b198105ebe5004c0565de85499ff14f319 100644 --- a/tools/perf/pmu-events/arch/riscv/mapfile.csv +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -15,3 +15,5 @@ # #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core +0x5b7-0x0-0x0,v1,thead/c900-legacy,core +0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json new file mode 100644 index 0000000000000000000000000000000000000000..fbffcacb2aceb5a18f68d4abc712a6306018f170 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json @@ -0,0 +1,172 @@ +[ + { + "EventName": "ACCESS_MMU_STLB", + "EventCode": "0x1", + "BriefDescription": "access MMU STLB" + }, + { + "EventName": "MISS_MMU_STLB", + "EventCode": "0x2", + "BriefDescription": "miss MMU STLB" + }, + { + "EventName": "ACCESS_MMU_PTE_C", + "EventCode": "0x3", + "BriefDescription": "access MMU PTE-Cache" + }, + { + "EventName": "MISS_MMU_PTE_C", + "EventCode": "0x4", + "BriefDescription": "miss MMU PTE-Cache" + }, + { + "EventName": "ROB_FLUSH", + "EventCode": "0x5", + "BriefDescription": "ROB flush (all kinds of exceptions)" + }, + { + "EventName": "BTB_PREDICTION_MISS", + "EventCode": "0x6", + "BriefDescription": "BTB prediction miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x7", + "BriefDescription": "ITLB miss" + }, + { + "EventName": "SYNC_DEL_FETCH_G", + "EventCode": "0x8", + "BriefDescription": "SYNC delivery a fetch-group" + }, + { + "EventName": "ICACHE_MISS", + "EventCode": "0x9", + "BriefDescription": "ICache miss" + }, + { + "EventName": "BPU_BR_RETIRE", + "EventCode": "0xA", + "BriefDescription": "condition branch instruction retire" + }, + { + "EventName": "BPU_BR_MISS", + "EventCode": "0xB", + "BriefDescription": "condition branch instruction miss" + }, + { + "EventName": "RET_INS_RETIRE", + "EventCode": "0xC", + "BriefDescription": "return instruction retire" + }, + { + "EventName": "RET_INS_MISS", + "EventCode": "0xD", + "BriefDescription": "return instruction miss" + }, + { + "EventName": "INDIRECT_JR_MISS", + "EventCode": "0xE", + "BriefDescription": "indirect JR instruction miss (inlcude without target)" + }, + { + "EventName": "IBUF_VAL_ID_NORDY", + "EventCode": "0xF", + "BriefDescription": "IBUF valid while ID not ready" + }, + { + "EventName": "IBUF_NOVAL_ID_RDY", + "EventCode": "0x10", + "BriefDescription": "IBUF not valid while ID ready" + }, + { + "EventName": "REN_INT_PHY_REG_NORDY", + "EventCode": "0x11", + "BriefDescription": "REN integer physical register file is not ready" + }, + { + "EventName": "REN_FP_PHY_REG_NORDY", + "EventCode": "0x12", + "BriefDescription": "REN floating point physical register file is not ready" + }, + { + "EventName": "REN_CP_NORDY", + "EventCode": "0x13", + "BriefDescription": "REN checkpoint is not ready" + }, + { + "EventName": "DEC_VAL_ROB_NORDY", + "EventCode": "0x14", + "BriefDescription": "DEC is valid and ROB is not ready" + }, + { + "EventName": "OOD_FLUSH_LS_DEP", + "EventCode": "0x15", + "BriefDescription": "out of order flush due to load/store dependency" + }, + { + "EventName": "BRU_RET_IJR_INS", + "EventCode": "0x16", + "BriefDescription": "BRU retire an IJR instruction" + }, + { + "EventName": "ACCESS_DTLB", + "EventCode": "0x17", + "BriefDescription": "access DTLB" + }, + { + "EventName": "MISS_DTLB", + "EventCode": "0x18", + "BriefDescription": "miss DTLB" + }, + { + "EventName": "LOAD_INS_DCACHE", + "EventCode": "0x19", + "BriefDescription": "load instruction access DCache" + }, + { + "EventName": "LOAD_INS_MISS_DCACHE", + "EventCode": "0x1A", + "BriefDescription": "load instruction miss DCache" + }, + { + "EventName": "STORE_INS_DCACHE", + "EventCode": "0x1B", + "BriefDescription": "store/amo instruction access DCache" + }, + { + "EventName": "STORE_INS_MISS_DCACHE", + "EventCode": "0x1C", + "BriefDescription": "store/amo instruction miss DCache" + }, + { + "EventName": "LOAD_SCACHE", + "EventCode": "0x1D", + "BriefDescription": "load access SCache" + }, + { + "EventName": "STORE_SCACHE", + "EventCode": "0x1E", + "BriefDescription": "store access SCache" + }, + { + "EventName": "LOAD_MISS_SCACHE", + "EventCode": "0x1F", + "BriefDescription": "load miss SCache" + }, + { + "EventName": "STORE_MISS_SCACHE", + "EventCode": "0x20", + "BriefDescription": "store miss SCache" + }, + { + "EventName": "L2C_PF_REQ", + "EventCode": "0x21", + "BriefDescription": "L2C data-prefetcher request" + }, + { + "EventName": "L2C_PF_HIT", + "EventCode": "0x22", + "BriefDescription": "L2C data-prefetcher hit" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json new file mode 100644 index 0000000000000000000000000000000000000000..9b4a032186a7b1c41e342e1bf054bf8e4751c110 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json new file mode 100644 index 0000000000000000000000000000000000000000..2b142348d6359208064bcfb825ad796a0b233a3e --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json @@ -0,0 +1,67 @@ +[ + { + "EventName": "L1_ICACHE_ACCESS", + "EventCode": "0x00000001", + "BriefDescription": "L1 instruction cache access" + }, + { + "EventName": "L1_ICACHE_MISS", + "EventCode": "0x00000002", + "BriefDescription": "L1 instruction cache miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x00000003", + "BriefDescription": "I-UTLB miss" + }, + { + "EventName": "DTLB_MISS", + "EventCode": "0x00000004", + "BriefDescription": "D-UTLB miss" + }, + { + "EventName": "JTLB_MISS", + "EventCode": "0x00000005", + "BriefDescription": "JTLB miss" + }, + { + "EventName": "L1_DCACHE_READ_ACCESS", + "EventCode": "0x0000000c", + "BriefDescription": "L1 data cache read access" + }, + { + "EventName": "L1_DCACHE_READ_MISS", + "EventCode": "0x0000000d", + "BriefDescription": "L1 data cache read miss" + }, + { + "EventName": "L1_DCACHE_WRITE_ACCESS", + "EventCode": "0x0000000e", + "BriefDescription": "L1 data cache write access" + }, + { + "EventName": "L1_DCACHE_WRITE_MISS", + "EventCode": "0x0000000f", + "BriefDescription": "L1 data cache write miss" + }, + { + "EventName": "LL_CACHE_READ_ACCESS", + "EventCode": "0x00000010", + "BriefDescription": "LL Cache read access" + }, + { + "EventName": "LL_CACHE_READ_MISS", + "EventCode": "0x00000011", + "BriefDescription": "LL Cache read miss" + }, + { + "EventName": "LL_CACHE_WRITE_ACCESS", + "EventCode": "0x00000012", + "BriefDescription": "LL Cache write access" + }, + { + "EventName": "LL_CACHE_WRITE_MISS", + "EventCode": "0x00000013", + "BriefDescription": "LL Cache write miss" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json new file mode 100644 index 0000000000000000000000000000000000000000..9b4a032186a7b1c41e342e1bf054bf8e4751c110 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json new file mode 100644 index 0000000000000000000000000000000000000000..c822b5373333935b2e56a75b25ffe73aa703fcdc --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json @@ -0,0 +1,72 @@ +[ + { + "EventName": "INST_BRANCH_MISPREDICT", + "EventCode": "0x00000006", + "BriefDescription": "Mispredicted branch instructions" + }, + { + "EventName": "INST_BRANCH", + "EventCode": "0x00000007", + "BriefDescription": "Retired branch instructions" + }, + { + "EventName": "INST_JMP_MISPREDICT", + "EventCode": "0x00000008", + "BriefDescription": "Indirect branch mispredict" + }, + { + "EventName": "INST_JMP", + "EventCode": "0x00000009", + "BriefDescription": "Retired jmp instructions" + }, + { + "EventName": "INST_STORE", + "EventCode": "0x0000000b", + "BriefDescription": "Retired store instructions" + }, + { + "EventName": "INST_ALU", + "EventCode": "0x0000001d", + "BriefDescription": "Retired ALU instructions" + }, + { + "EventName": "INST_LDST", + "EventCode": "0x0000001e", + "BriefDescription": "Retired Load/Store instructions" + }, + { + "EventName": "INST_VECTOR", + "EventCode": "0x0000001f", + "BriefDescription": "Retired Vector instructions" + }, + { + "EventName": "INST_CSR", + "EventCode": "0x00000020", + "BriefDescription": "Retired CSR instructions" + }, + { + "EventName": "INST_SYNC", + "EventCode": "0x00000021", + "BriefDescription": "Retired sync instructions (AMO/LR/SC instructions)" + }, + { + "EventName": "INST_UNALIGNED_ACCESS", + "EventCode": "0x00000022", + "BriefDescription": "Retired Store/Load instructions with unaligned memory access" + }, + { + "EventName": "INST_ECALL", + "EventCode": "0x00000025", + "BriefDescription": "Retired ecall instructions" + }, + { + "EventName": "INST_LONG_JP", + "EventCode": "0x00000026", + "BriefDescription": "Retired long jump instructions" + }, + { + "EventName": "INST_FP", + "EventCode": "0x0000002a", + "BriefDescription": "Retired FPU instructions" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json new file mode 100644 index 0000000000000000000000000000000000000000..0ab6f288af91dc8a02ebcc07964cbf017feb4694 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json @@ -0,0 +1,80 @@ +[ + { + "EventName": "LSU_SPEC_FAIL", + "EventCode": "0x0000000a", + "BriefDescription": "LSU speculation fail" + }, + { + "EventName": "IDU_RF_PIPE_FAIL", + "EventCode": "0x00000014", + "BriefDescription": "Instruction decode unit launch pipeline failed in RF state" + }, + { + "EventName": "IDU_RF_REG_FAIL", + "EventCode": "0x00000015", + "BriefDescription": "Instruction decode unit launch register file fail in RF state" + }, + { + "EventName": "IDU_RF_INSTRUCTION", + "EventCode": "0x00000016", + "BriefDescription": "retired instruction count of Instruction decode unit in RF (Register File) stage" + }, + { + "EventName": "LSU_4K_STALL", + "EventCode": "0x00000017", + "BriefDescription": "LSU stall times for long distance data access (Over 4K)", + "PublicDescription": "This stall occurs when translate virtual address with page offset over 4k" + }, + { + "EventName": "LSU_OTHER_STALL", + "EventCode": "0x00000018", + "BriefDescription": "LSU stall times for other reasons (except the 4k stall)" + }, + { + "EventName": "LSU_SQ_OTHER_DIS", + "EventCode": "0x00000019", + "BriefDescription": "LSU store queue discard others" + }, + { + "EventName": "LSU_SQ_DATA_DISCARD", + "EventCode": "0x0000001a", + "BriefDescription": "LSU store queue discard data (uops)" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0000001b", + "BriefDescription": "Branch misprediction in BTB" + }, + { + "EventName": "BRANCH_DIRECTION_PREDICTION", + "EventCode": "0x0000001c", + "BriefDescription": "All branch prediction in BTB", + "PublicDescription": "This event including both successful prediction and failed prediction in BTB" + }, + { + "EventName": "INTERRUPT_ACK_COUNT", + "EventCode": "0x00000023", + "BriefDescription": "acknowledged interrupt count" + }, + { + "EventName": "INTERRUPT_OFF_CYCLE", + "EventCode": "0x00000024", + "BriefDescription": "PLIC arbitration time when the interrupt is not responded", + "PublicDescription": "The arbitration time is recorded while meeting any of the following:\n- CPU is M-mode and MIE == 0\n- CPU is S-mode and delegation and SIE == 0\n" + }, + { + "EventName": "IFU_STALLED_CYCLE", + "EventCode": "0x00000027", + "BriefDescription": "Number of stall cycles of the instruction fetch unit (IFU)." + }, + { + "EventName": "IDU_STALLED_CYCLE", + "EventCode": "0x00000028", + "BriefDescription": "hpcp_backend_stall Number of stall cycles of the instruction decoding unit (IDU) and next-level pipeline unit." + }, + { + "EventName": "SYNC_STALL", + "EventCode": "0x00000029", + "BriefDescription": "Sync instruction stall cycle fence/fence.i/sync/sfence" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 3388b58b8f1a687d9afb622c6e732c7c532612f0..35124a4ddcb2bd547d190b40cdbb2c81fd5f5841 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -69,12 +69,6 @@ "MetricName": "C9_Pkg_Residency", "ScaleUnit": "100%" }, - { - "BriefDescription": "Uncore frequency per die [GHZ]", - "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", - "MetricGroup": "SoC", - "MetricName": "UNCORE_FREQ" - }, { "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", @@ -809,6 +803,13 @@ "ScaleUnit": "100%", "Unit": "cpu_atom" }, + { + "BriefDescription": "Uncore frequency per die [GHZ]", + "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", + "MetricGroup": "SoC", + "MetricName": "UNCORE_FREQ", + "Unit": "cpu_core" + }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.", "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)", @@ -1838,7 +1839,7 @@ }, { "BriefDescription": "Average number of parallel data read requests to external memory", - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "tma_info_system_mem_parallel_reads", "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json new file mode 100644 index 0000000000000000000000000000000000000000..55263e5e4f69a783d14666131b1169c8ec123027 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json @@ -0,0 +1,101 @@ +[ + { + "EventName": "umc_mem_clk", + "PublicDescription": "Number of memory clock cycles.", + "EventCode": "0x00", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.all", + "PublicDescription": "Number of ACTIVATE commands sent.", + "EventCode": "0x05", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.rd", + "PublicDescription": "Number of ACTIVATE commands sent for reads.", + "EventCode": "0x05", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.wr", + "PublicDescription": "Number of ACTIVATE commands sent for writes.", + "EventCode": "0x05", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.all", + "PublicDescription": "Number of PRECHARGE commands sent.", + "EventCode": "0x06", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.rd", + "PublicDescription": "Number of PRECHARGE commands sent for reads.", + "EventCode": "0x06", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.wr", + "PublicDescription": "Number of PRECHARGE commands sent for writes.", + "EventCode": "0x06", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.all", + "PublicDescription": "Number of CAS commands sent.", + "EventCode": "0x0a", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.rd", + "PublicDescription": "Number of CAS commands sent for reads.", + "EventCode": "0x0a", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.wr", + "PublicDescription": "Number of CAS commands sent for writes.", + "EventCode": "0x0a", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.all", + "PublicDescription": "Number of clocks used by the data bus.", + "EventCode": "0x14", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.rd", + "PublicDescription": "Number of clocks used by the data bus for reads.", + "EventCode": "0x14", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.wr", + "PublicDescription": "Number of clocks used by the data bus for writes.", + "EventCode": "0x14", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json index 5e6a793acf7b2a8e8785cbb17bcb8baedcd92c4d..96e06401c6cbbe22992266cc3306e3f10dcc6262 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json @@ -330,5 +330,89 @@ "MetricGroup": "data_fabric", "PerPkg": "1", "ScaleUnit": "6.103515625e-5MiB" + }, + { + "MetricName": "umc_data_bus_utilization", + "BriefDescription": "Memory controller data bus utilization.", + "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_write_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for writes.", + "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_mem_read_bandwidth", + "BriefDescription": "Estimated memory read bandwidth.", + "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_write_bandwidth", + "BriefDescription": "Estimated memory write bandwidth.", + "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_bandwidth", + "BriefDescription": "Estimated combined memory bandwidth.", + "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_activate_cmd_rate", + "BriefDescription": "Memory controller ACTIVATE command rate.", + "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_precharge_cmd_rate", + "BriefDescription": "Memory controller PRECHARGE command rate.", + "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" } ] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 84c132af3dfa5717c6477f2cfe53c60ef0466a1d..8bc6c07078566e0f2baa317041fa45dc95c0c60f 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -1862,6 +1862,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json index 4a9d211e9d4f11cbb7fffacc91139b7797fbe2b7..1bdefaf96287777b1ca7ec8cc3fed35cd8a0418f 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json @@ -23,26 +23,47 @@ "UMask": "0x10" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", "SampleAfterValue": "2000003", "UMask": "0x2" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", "SampleAfterValue": "2000003", "UMask": "0x4" }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V0", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V2", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json index 6dcf3b763af4f96d73f00f27001e89c52e5cf895..1f8200fb89647626f9a91853b36428fcc3594ef6 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json @@ -1,20 +1,4 @@ [ - { - "BriefDescription": "AMX retired arithmetic BF16 operations.", - "EventCode": "0xce", - "EventName": "AMX_OPS_RETIRED.BF16", - "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4", - "SampleAfterValue": "1000003", - "UMask": "0x2" - }, - { - "BriefDescription": "AMX retired arithmetic integer 8-bit operations.", - "EventCode": "0xce", - "EventName": "AMX_OPS_RETIRED.INT8", - "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.", - "SampleAfterValue": "1000003", - "UMask": "0x1" - }, { "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", "CounterMask": "1", @@ -505,7 +489,7 @@ "UMask": "0x1" }, { - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json index 09d840c7da4c9c9a9314bcd9edcc00a4f008fbc8..65d088556bae8dc1c4fce01201da5c99a3213a99 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json @@ -4825,11 +4825,11 @@ "Unit": "M3UPI" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", "PerPkg": "1", - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", "UMask": "0x1", "Unit": "MDF" }, @@ -4861,11 +4861,11 @@ "Unit": "MDF" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", "PerPkg": "1", - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", "UMask": "0x4", "Unit": "MDF" }, diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json index 557080b74ee50dc2be924f11d3c5d9802481bd4e..0761980c34a04014cdbde96a98ed112c1a33bc2b 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json @@ -1185,6 +1185,36 @@ "UMask": "0x70ff010", "Unit": "IIO" }, + { + "BriefDescription": ": IOTLB Hits to a 1G Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.1G_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", + "UMask": "0x10", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 2M Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.2M_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", + "UMask": "0x8", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 4K Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.4K_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": ": Context cache hits", "EventCode": "0x40", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index e98602c667072a1ac916e05f9f3e50ac5a3ed1f3..71d78a7841ea826073622118fb6e3fa44b1b89bc 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -1846,6 +1846,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/other.json b/tools/perf/pmu-events/arch/x86/icelakex/other.json index 63d5faf2fc43ee963eb1bc7dfebba1ecfa7653a6..11810daaf1503062a64b8fa207537296e2e7335a 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/other.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/other.json @@ -19,7 +19,7 @@ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", "EventCode": "0x28", "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.", "SampleAfterValue": "200003", "UMask": "0x20" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json index 176e5ef2a24af9e99c5119c2bff7d8ec195936ec..45ee6bceba7f1365a985522ee0f6ef1eadacad90 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json @@ -519,7 +519,7 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "EventCode": "0x5e", "EventName": "RS_EVENTS.EMPTY_CYCLES", - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", "SampleAfterValue": "1000003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json index f87ea3f66d1becd35dcc6f92751d855fe19f0f66..a066a009c51178f7c835fb2d8c757f4772023eab 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json @@ -38,7 +38,7 @@ "EventCode": "0x10", "EventName": "UNC_I_COHERENT_OPS.CLFLUSH", "PerPkg": "1", - "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP", + "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP", "UMask": "0x80", "Unit": "IRP" }, @@ -65,7 +65,7 @@ "EventCode": "0x10", "EventName": "UNC_I_COHERENT_OPS.WBMTOI", "PerPkg": "1", - "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP", + "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP", "UMask": "0x40", "Unit": "IRP" }, @@ -454,7 +454,7 @@ "EventCode": "0x11", "EventName": "UNC_I_TRANSACTIONS.WRITES", "PerPkg": "1", - "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", + "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", "UMask": "0x2", "Unit": "IRP" }, diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index e571683f59f3d587637b5be1078801606673a5da..4d1deed4437ab24fb1fb060819cc40deb9fb7daa 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -7,7 +7,7 @@ GenuineIntel-6-56,v11,broadwellde,core GenuineIntel-6-4F,v22,broadwellx,core GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core GenuineIntel-6-9[6C],v1.04,elkhartlake,core -GenuineIntel-6-CF,v1.01,emeraldrapids,core +GenuineIntel-6-CF,v1.02,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core GenuineIntel-6-B6,v1.00,grandridge,core @@ -15,7 +15,7 @@ GenuineIntel-6-A[DE],v1.01,graniterapids,core GenuineIntel-6-(3C|45|46),v33,haswell,core GenuineIntel-6-3F,v28,haswellx,core GenuineIntel-6-7[DE],v1.19,icelake,core -GenuineIntel-6-6[AC],v1.21,icelakex,core +GenuineIntel-6-6[AC],v1.23,icelakex,core GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core @@ -26,7 +26,7 @@ GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core GenuineIntel-6-A7,v1.01,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-8F,v1.16,sapphirerapids,core +GenuineIntel-6-8F,v1.17,sapphirerapids,core GenuineIntel-6-AF,v1.00,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json index 0c880e41566995eb8cb2c5b3cafd05713161347c..27433fc15ede77b2de29677fdef4cdd4ce2b7a77 100644 --- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json @@ -985,7 +985,7 @@ }, { "BriefDescription": "Average number of parallel data read requests to external memory", - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "tma_info_system_mem_parallel_reads", "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches" diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json index 4a9d211e9d4f11cbb7fffacc91139b7797fbe2b7..1bdefaf96287777b1ca7ec8cc3fed35cd8a0418f 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json @@ -23,26 +23,47 @@ "UMask": "0x10" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", "SampleAfterValue": "2000003", "UMask": "0x2" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", "SampleAfterValue": "2000003", "UMask": "0x4" }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V0", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V2", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json index 6dcf3b763af4f96d73f00f27001e89c52e5cf895..2cfe814d20151c301deeed6562dc5bcdf10b3f42 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json @@ -505,7 +505,7 @@ "UMask": "0x1" }, { - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index 06c6d67cb76b073d7862899b180383ac8753503b..e31a4aac9f205e4d462b43472dbfc02c2ffd91c1 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -1964,6 +1964,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json index 09d840c7da4c9c9a9314bcd9edcc00a4f008fbc8..65d088556bae8dc1c4fce01201da5c99a3213a99 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json @@ -4825,11 +4825,11 @@ "Unit": "M3UPI" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", "PerPkg": "1", - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", "UMask": "0x1", "Unit": "MDF" }, @@ -4861,11 +4861,11 @@ "Unit": "MDF" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", "PerPkg": "1", - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", "UMask": "0x4", "Unit": "MDF" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json index 8b5f54fed10339640840d49c01cc7b76e935a9ee..03596db8771016b931673489b6abfc97bc1522a4 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json @@ -1249,6 +1249,36 @@ "UMask": "0x70ff010", "Unit": "IIO" }, + { + "BriefDescription": ": IOTLB Hits to a 1G Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.1G_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", + "UMask": "0x10", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 2M Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.2M_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", + "UMask": "0x8", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 4K Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.4K_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", + "UMask": "0x4", + "Unit": "IIO" + }, { "BriefDescription": ": Context cache hits", "EventCode": "0x40", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 4a8f8eeb7525594483fa050cd53262b3a8438c7a..ec3aa5ef00a3c79bdf8cae408a271d49754204be 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1806,6 +1806,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 3c091ab753059072973a37cc19a89e2ef0f0c8af..53ab050c8fa436f584867c707a91a1ae985aa567 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -83,7 +83,7 @@ def c_len(s: str) -> int: """Return the length of s a C string This doesn't handle all escape characters properly. It first assumes - all \ are for escaping, it then adjusts as it will have over counted + all \\ are for escaping, it then adjusts as it will have over counted \\. The code uses \000 rather than \0 as a terminator as an adjacent number would be folded into a string of \0 (ie. "\0" + "5" doesn't equal a terminator followed by the number 5 but the escape of @@ -286,6 +286,7 @@ class JsonEvent: 'imx8_ddr': 'imx8_ddr', 'L3PMC': 'amd_l3', 'DFPMC': 'amd_df', + 'UMCPMC': 'amd_umc', 'cpu_core': 'cpu_core', 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', @@ -354,6 +355,7 @@ class JsonEvent: ('SampleAfterValue', 'period='), ('UMask', 'umask='), ('NodeType', 'type='), + ('RdWrMask', 'rdwrmask='), ] for key, value in event_fields: if key in jd and jd[key] != '0': diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index d59ff53f1d946c01e59038f353b5658658dee1e7..d973c2baed1c8559d2c769516a07542ecc993dc5 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -45,8 +45,8 @@ parser = OptionParser(option_list=option_list) # Initialize global dicts and regular expression disasm_cache = dict() cpu_data = dict() -disasm_re = re.compile("^\s*([0-9a-fA-F]+):") -disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") +disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 glb_source_file_name = None @@ -188,6 +188,17 @@ def process_event(param_dict): dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + ip = sample["ip"] + addr = sample["addr"] + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = addr + return + + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -209,16 +220,6 @@ def process_event(param_dict): if (name[0:8] != "branches"): return - cpu = sample["cpu"] - ip = sample["ip"] - addr = sample["addr"] - - # Initialize CPU data if it's empty, and directly return back - # if this is the first tracing event for this CPU. - if (cpu_data.get(str(cpu) + 'addr') == None): - cpu_data[str(cpu) + 'addr'] = addr - return - # The format for packet is: # # +------------+------------+------------+ @@ -258,8 +259,9 @@ def process_event(param_dict): if (options.objdump_name != None): # It doesn't need to decrease virtual memory offset for disassembly - # for kernel dso, so in this case we set vm_start to zero. - if (dso == "[kernel.kallsyms]"): + # for kernel dso and executable file dso, so in this case we set + # vm_start to zero. + if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 else: dso_vm_start = int(dso_start) diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py index 2560a042dc6fa4e9c0cd80dd8b5f068117be9082..9401f7c14747788f7f1e4617fb22354819785a46 100644 --- a/tools/perf/scripts/python/compaction-times.py +++ b/tools/perf/scripts/python/compaction-times.py @@ -260,7 +260,7 @@ def pr_help(): comm_re = None pid_re = None -pid_regex = "^(\d*)-(\d*)$|^(\d*)$" +pid_regex = r"^(\d*)-(\d*)$|^(\d*)$" opt_proc = popt.DISP_DFL opt_disp = topt.DISP_ALL diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 13f2d8a8161096e8f8f9b691146629dafeee4538..121cf61ba1b345f579e2db83f1dfb586e8d88ec7 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -677,8 +677,8 @@ class CallGraphModelBase(TreeModel): # sqlite supports GLOB (text only) which uses * and ? and is case sensitive if not self.glb.dbref.is_sqlite3: # Escape % and _ - s = value.replace("%", "\%") - s = s.replace("_", "\_") + s = value.replace("%", "\\%") + s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2b45ffa462a6c4b9b29629dde2fa8262620bbe61..53ba9c3e20e05782eb47e7368795b5869f263be9 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -77,3 +77,17 @@ CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUI CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls perf-y += workloads/ + +ifdef SHELLCHECK + SHELL_TESTS := $(shell find tests/shell -executable -type f -name '*.sh') + TEST_LOGS := $(SHELL_TESTS:tests/shell/%=shell/%.shellcheck_log) +else + SHELL_TESTS := + TEST_LOGS := +endif + +$(OUTPUT)%.shellcheck_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + +perf-y += $(TEST_LOGS) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 61186d0d1cfa1afda6d99260db51d5603e732a22..97e1bdd6ec0e9fc3ceafc96ac6df071c034cd314 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -188,7 +188,7 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb if (perf_pmus__num_core_pmus() > 1) { /* * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type whic + * core PMU then each PMU will have a different type which * requires additional support. */ pr_debug("Skip test on hybrid systems"); diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 27c21271a16c997ab8a606d347a96967bec9919d..b44e4e6e444386af80ad5027513cd5a2fe4986de 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -6,7 +6,7 @@ flags=0|8 cpu=* type=0|1 size=136 -config=0 +config=0|1 sample_period=* sample_type=263 read_format=0|4|20 diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 index fbb065842880f3461bdb91571fc371a3ca10fa7d..bed765450ca976f8b13dad231eae2e2743840acc 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 @@ -6,4 +6,4 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 129 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 index c598c803221da79fe573a308ed27ce3fa848202e..a65113cd7311b4a8faacfbe2ef54567b00163748 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 @@ -6,7 +6,7 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 1 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0x200000 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0x400000 kernel_since = 6.1 [event:base-record] diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cb6f1dd00dc483a495fdb465067aa33ae6a6be1e..4a5973f9bb9b370f1bc966f04e1efdd7b03ef64d 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,6 +14,7 @@ #include #include #include "builtin.h" +#include "config.h" #include "hist.h" #include "intlist.h" #include "tests.h" @@ -32,6 +33,7 @@ static bool dont_fork; const char *dso_to_test; +const char *test_objdump_path = "objdump"; /* * List of architecture specific tests. Not a weak symbol as the array length is @@ -60,8 +62,6 @@ static struct test_suite *generic_tests[] = { &suite__pmu, &suite__pmu_events, &suite__dso_data, - &suite__dso_data_cache, - &suite__dso_data_reopen, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, @@ -513,6 +513,15 @@ static int run_workload(const char *work, int argc, const char **argv) return -1; } +static int perf_test__config(const char *var, const char *value, + void *data __maybe_unused) +{ + if (!strcmp(var, "annotate.objdump")) + test_objdump_path = value; + + return 0; +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -529,6 +538,8 @@ int cmd_test(int argc, const char **argv) "Do not fork for testcase"), OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), + OPT_STRING(0, "objdump", &test_objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; @@ -538,6 +549,8 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + perf_config(perf_test__config, NULL); + /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 3af81012014edb8455965d5ee78d834416d5b4da..7a3a7bbbec7146b772cd6ab029b1d22c9d94a873 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -185,7 +185,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, int ret; fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; - ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, + ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len, filename); if (ret <= 0 || (size_t)ret >= sizeof(cmd)) return -1; @@ -511,38 +511,6 @@ static void fs_something(void) } } -#ifdef __s390x__ -#include "header.h" // for get_cpuid() -#endif - -static const char *do_determine_event(bool excl_kernel) -{ - const char *event = excl_kernel ? "cycles:u" : "cycles"; - -#ifdef __s390x__ - char cpuid[128], model[16], model_c[16], cpum_cf_v[16]; - unsigned int family; - int ret, cpum_cf_a; - - if (get_cpuid(cpuid, sizeof(cpuid))) - goto out_clocks; - ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c, - model, cpum_cf_v, &cpum_cf_a); - if (ret != 5) /* Not available */ - goto out_clocks; - if (excl_kernel && (cpum_cf_a & 4)) - return event; - if (!excl_kernel && (cpum_cf_a & 2)) - return event; - - /* Fall through: missing authorization */ -out_clocks: - event = excl_kernel ? "cpu-clock:u" : "cpu-clock"; - -#endif - return event; -} - static void do_something(void) { fs_something(); @@ -583,8 +551,10 @@ static int do_test_code_reading(bool try_kcore) int err = -1, ret; pid_t pid; struct map *map; - bool have_vmlinux, have_kcore, excl_kernel = false; + bool have_vmlinux, have_kcore; struct dso *dso; + const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL }; + int evidx = 0; pid = getpid(); @@ -618,7 +588,7 @@ static int do_test_code_reading(bool try_kcore) /* No point getting kernel events if there is no kernel object */ if (!have_vmlinux && !have_kcore) - excl_kernel = true; + evidx++; threads = thread_map__new_by_tid(pid); if (!threads) { @@ -640,13 +610,13 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed\n"); goto out_put; } - while (1) { + while (events[evidx]) { const char *str; evlist = evlist__new(); @@ -657,7 +627,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__set_maps(&evlist->core, cpus, threads); - str = do_determine_event(excl_kernel); + str = events[evidx]; pr_debug("Parsing event '%s'\n", str); ret = parse_event(evlist, str); if (ret < 0) { @@ -675,32 +645,32 @@ static int do_test_code_reading(bool try_kcore) ret = evlist__open(evlist); if (ret < 0) { - if (!excl_kernel) { - excl_kernel = true; - /* - * Both cpus and threads are now owned by evlist - * and will be freed by following perf_evlist__set_maps - * call. Getting reference to keep them alive. - */ - perf_cpu_map__get(cpus); - perf_thread_map__get(threads); - perf_evlist__set_maps(&evlist->core, NULL, NULL); - evlist__delete(evlist); - evlist = NULL; - continue; - } + evidx++; - if (verbose > 0) { + if (events[evidx] == NULL && verbose > 0) { char errbuf[512]; evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } - goto out_put; + /* + * Both cpus and threads are now owned by evlist + * and will be freed by following perf_evlist__set_maps + * call. Getting reference to keep them alive. + */ + perf_cpu_map__get(cpus); + perf_thread_map__get(threads); + perf_evlist__set_maps(&evlist->core, NULL, NULL); + evlist__delete(evlist); + evlist = NULL; + continue; } break; } + if (events[evidx] == NULL) + goto out_put; + ret = evlist__mmap(evlist, UINT_MAX); if (ret < 0) { pr_debug("evlist__mmap failed\n"); @@ -721,7 +691,7 @@ static int do_test_code_reading(bool try_kcore) err = TEST_CODE_READING_NO_KERNEL_OBJ; else if (!have_vmlinux && !try_kcore) err = TEST_CODE_READING_NO_VMLINUX; - else if (excl_kernel) + else if (strstr(events[evidx], ":u")) err = TEST_CODE_READING_NO_ACCESS; else err = TEST_CODE_READING_OK; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7730fc2ab40b734274fe89b569def4f9909e0e31..bd8e396f3e57bbb27d46567677a20273877492d6 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused, static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - struct perf_cpu_map *any = perf_cpu_map__dummy_new(); + struct perf_cpu_map *any = perf_cpu_map__new_any_cpu(); struct perf_cpu_map *one = perf_cpu_map__new("1"); struct perf_cpu_map *two = perf_cpu_map__new("2"); struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3419a4ab5590f5fff2ae85334f8941b0c159b2e3..2d67422c1222949700e7759fd174080ea439765a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -394,6 +394,15 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub return 0; } -DEFINE_SUITE("DSO data read", dso_data); -DEFINE_SUITE("DSO data cache", dso_data_cache); -DEFINE_SUITE("DSO data reopen", dso_data_reopen); + +static struct test_case tests__dso_data[] = { + TEST_CASE("read", dso_data), + TEST_CASE("cache", dso_data_cache), + TEST_CASE("reopen", dso_data_reopen), + { .name = NULL, } +}; + +struct test_suite suite__dso_data = { + .desc = "DSO data tests", + .test_cases = tests__dso_data, +}; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 8f4f9b632e1e586a85911d61e4e180294041fff3..5a3b2bed07f327e1806766a5bdf0c61792673c75 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/make b/tools/perf/tests/make index d9945ed25bc5ae96c765c152b4761bdf4a622c78..8a4da7eb637a8abd38f047238c6433e9929f9a2d 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -183,7 +183,7 @@ run += make_install_prefix_slash # run += make_install_pdf run += make_minimal -old_libbpf := $(shell echo '\#include ' | $(CC) -E -dM -x c -| egrep -q "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") +old_libbpf := $(shell echo '\#include ' | $(CC) -E -dM -x c -| grep -q -E "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") ifneq ($(old_libbpf),) run += make_libbpf_dynamic diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index 5bb1123a91a7ccf0f38a021581bcd194c36bec1c..bb3fbfe5a73e2302155fe40a953102e15f0dd103 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -14,44 +14,59 @@ struct map_def { u64 end; }; +struct check_maps_cb_args { + struct map_def *merged; + unsigned int i; +}; + +static int check_maps_cb(struct map *map, void *data) +{ + struct check_maps_cb_args *args = data; + struct map_def *merged = &args->merged[args->i]; + + if (map__start(map) != merged->start || + map__end(map) != merged->end || + strcmp(map__dso(map)->name, merged->name) || + refcount_read(map__refcnt(map)) != 1) { + return 1; + } + args->i++; + return 0; +} + +static int failed_cb(struct map *map, void *data __maybe_unused) +{ + pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", + map__start(map), + map__end(map), + map__dso(map)->name, + refcount_read(map__refcnt(map))); + + return 0; +} + static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps) { - struct map_rb_node *rb_node; - unsigned int i = 0; bool failed = false; if (maps__nr_maps(maps) != size) { pr_debug("Expected %d maps, got %d", size, maps__nr_maps(maps)); failed = true; } else { - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - if (map__start(map) != merged[i].start || - map__end(map) != merged[i].end || - strcmp(map__dso(map)->name, merged[i].name) || - refcount_read(map__refcnt(map)) != 1) { - failed = true; - } - i++; - } + struct check_maps_cb_args args = { + .merged = merged, + .i = 0, + }; + failed = maps__for_each_map(maps, check_maps_cb, &args); } if (failed) { pr_debug("Expected:\n"); - for (i = 0; i < size; i++) { + for (unsigned int i = 0; i < size; i++) { pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: 1\n", merged[i].start, merged[i].end, merged[i].name); } pr_debug("Got:\n"); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", - map__start(map), - map__end(map), - map__dso(map)->name, - refcount_read(map__refcnt(map))); - } + maps__for_each_map(maps, failed_cb, NULL); } return failed ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 886a13a77a1624022f9165837a7787f87ca709c4..012c8ae439fdcf56cd6b2492d7460de5110cc708 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_free_threads; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index f3275be83a3382ee1437ea671274f47fad60eca5..fb114118c87640b848bbd0243a7810cc2877b032 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_thread_map_delete; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f78be21a5999b699bfa12ce52c2592b56c4d75ef..fbdf710d5eea06047784aef245438b87442451d9 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -162,6 +162,22 @@ static int test__checkevent_numeric(struct evlist *evlist) return TEST_OK; } + +static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name) +{ + struct perf_pmu *pmu; + + if (evsel->attr.type == PERF_TYPE_HARDWARE) { + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id)); + return 0; + } + pmu = perf_pmus__find_by_type(evsel->attr.type); + + TEST_ASSERT_VAL("unexpected PMU type", pmu); + TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name)); + return 0; +} + static int test__checkevent_symbolic_name(struct evlist *evlist) { struct perf_evsel *evsel; @@ -169,10 +185,12 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + + if (ret) + return ret; } + return TEST_OK; } @@ -183,8 +201,10 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; /* * The period value gets configured within evlist__config, * while this test executes only parse events method. @@ -861,10 +881,14 @@ static int test__group1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions:k */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -878,8 +902,10 @@ static int test__group1(struct evlist *evlist) /* cycles:upp */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -907,6 +933,8 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist)); evlist__for_each_entry(evlist, evsel) { + int ret; + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) { /* faults + :ku modifier */ leader = evsel; @@ -939,8 +967,10 @@ static int test__group2(struct evlist *evlist) continue; } /* cycles:k */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -957,6 +987,7 @@ static int test__group2(struct evlist *evlist) static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2)); @@ -1045,8 +1076,10 @@ static int test__group3(struct evlist *evlist __maybe_unused) continue; } /* instructions:u */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1070,10 +1103,14 @@ static int test__group4(struct evlist *evlist __maybe_unused) num_core_entries() == evlist__nr_groups(evlist)); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:u + p */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1089,8 +1126,10 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* instructions:kp + p */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1108,6 +1147,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) static int test__group5(struct evlist *evlist __maybe_unused) { struct evsel *evsel = NULL, *leader; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (5 * num_core_entries())); @@ -1117,8 +1157,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles + G */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1133,8 +1175,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions + G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1148,8 +1192,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles:G */ evsel = leader = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1164,8 +1210,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions:G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1178,8 +1226,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1201,10 +1251,14 @@ static int test__group_gh1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :H group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1218,8 +1272,10 @@ static int test__group_gh1(struct evlist *evlist) /* cache-misses:G + :H group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1242,10 +1298,14 @@ static int test__group_gh2(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :G group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1259,8 +1319,10 @@ static int test__group_gh2(struct evlist *evlist) /* cache-misses:H + :G group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1283,10 +1345,14 @@ static int test__group_gh3(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :u group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1300,8 +1366,10 @@ static int test__group_gh3(struct evlist *evlist) /* cache-misses:H + :u group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1324,10 +1392,14 @@ static int test__group_gh4(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :uG group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1341,8 +1413,10 @@ static int test__group_gh4(struct evlist *evlist) /* cache-misses:H + :uG group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1363,10 +1437,14 @@ static int test__leader_sample1(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1379,8 +1457,10 @@ static int test__leader_sample1(struct evlist *evlist) /* cache-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1392,8 +1472,10 @@ static int test__leader_sample1(struct evlist *evlist) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1415,10 +1497,14 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) evlist->core.nr_entries == (2 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1431,8 +1517,10 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1472,10 +1560,14 @@ static int test__pinned_group(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1484,13 +1576,18 @@ static int test__pinned_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); } return TEST_OK; @@ -1517,10 +1614,14 @@ static int test__exclusive_group(struct evlist *evlist) evlist->core.nr_entries == 3 * num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1529,13 +1630,18 @@ static int test__exclusive_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); } return TEST_OK; @@ -1677,9 +1783,11 @@ static int test__checkevent_raw_pmu(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return TEST_OK; } @@ -1687,9 +1795,11 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); return TEST_OK; } @@ -1697,9 +1807,11 @@ static int test__sym_event_dc(struct evlist *evlist) static int test__term_equal_term(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); return TEST_OK; } @@ -1707,9 +1819,11 @@ static int test__term_equal_term(struct evlist *evlist) static int test__term_equal_legacy(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); return TEST_OK; } @@ -2549,7 +2663,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest if (strchr(ent->d_name, '.')) continue; - /* exclude parametrized ones (name contains '?') */ + /* exclude parameterized ones (name contains '?') */ n = snprintf(pmu_event, sizeof(pmu_event), "%s%s", path, ent->d_name); if (n >= PATH_MAX) { pr_err("pmu event name crossed PATH_MAX(%d) size\n", PATH_MAX); @@ -2578,7 +2692,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest fclose(file); if (is_event_parameterized == 1) { - pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event); + pr_debug("skipping parameterized PMU event: %s which contains ?\n", pmu_event); continue; } diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index efcd71c2738afb9d93809e8bab18eb5dd3090195..bbe2ddeb9b745c0c8d51dc842afcfd603635fe4c 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c index a7e169d1bf645e302af1c9e3a10544ffcd0af76f..5f886cd09e6b3a62b5690dade94f1f8cae3279d2 100644 --- a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c +++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c @@ -42,7 +42,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned long i, len, size, thr; - pthread_t threads[256]; struct args args[256]; long long v; diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c index c0158fac7d0b0b47ebfc5dac1a8e3aa72d781981..e05a559253ca9d9366ad321d520349042fb07fca 100644 --- a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c +++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c @@ -57,7 +57,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, len, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 3) { diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c index 8f6d384208ed971debc09d296006e7408a790b2b..0fc7bf1a25af3607b40f091f62176134ddb7f9f6 100644 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c @@ -51,7 +51,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 2) { diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh new file mode 100755 index 0000000000000000000000000000000000000000..14b87af88703b09ab7eca336bf6314a0249d3beb --- /dev/null +++ b/tools/perf/tests/shell/diff.sh @@ -0,0 +1,108 @@ +#!/bin/sh +# perf diff tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog="perf test -w thloop" + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +testsym="test_loop" + +skip_test_missing_symbol ${testsym} + +cleanup() { + rm -rf "${perfdata1}" + rm -rf "${perfdata1}".old + rm -rf "${perfdata2}" + rm -rf "${perfdata2}".old + rm -rf "${perfdata3}" + rm -rf "${perfdata3}".old + + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +make_data() { + file="$1" + if ! perf record -o "${file}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + echo 1 + return + fi + if ! perf report -i "${file}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + echo 1 + return + fi + echo 0 +} + +test_two_files() { + echo "Basic two file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" | grep -q "${testsym}" + then + echo "Basic two file diff test [Failed diff]" + err=1 + return + fi + echo "Basic two file diff test [Success]" +} + +test_three_files() { + echo "Basic three file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata3}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" "${perfdata3}" | grep -q "${testsym}" + then + echo "Basic three file diff test [Failed diff]" + err=1 + return + fi + echo "Basic three file diff test [Success]" +} + +test_two_files +test_three_files + +cleanup +exit $err diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh new file mode 100644 index 0000000000000000000000000000000000000000..5d59c32ae3e7ba63350e431c5d0bbfa29734853c --- /dev/null +++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +perf_has_symbol() +{ + if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then + echo "perf does have symbol '$1'" + return 0 + fi + echo "perf does not have symbol '$1'" + return 1 +} + +skip_test_missing_symbol() +{ + if ! perf_has_symbol "$1" ; then + echo "perf is missing symbols - skipping test" + exit 2 + fi + return 0 +} diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh new file mode 100644 index 0000000000000000000000000000000000000000..c2fce1793538db8c5a5ef2d321c8b696de58f294 --- /dev/null +++ b/tools/perf/tests/shell/lib/setup_python.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +if [ "x$PYTHON" = "x" ] +then + python3 --version >/dev/null 2>&1 && PYTHON=python3 +fi +if [ "x$PYTHON" = "x" ] +then + python --version >/dev/null 2>&1 && PYTHON=python +fi +if [ "x$PYTHON" = "x" ] +then + echo Skipping test, python not detected please set environment variable PYTHON. + exit 2 +fi diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh new file mode 100755 index 0000000000000000000000000000000000000000..22b004f2b23ec6bb2c7748c6b804c292c276daf6 --- /dev/null +++ b/tools/perf/tests/shell/list.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# perf list tests +# SPDX-License-Identifier: GPL-2.0 + +set -e +err=0 + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh + +test_list_json() { + echo "Json output test" + perf list -j | $PYTHON -m json.tool + echo "Json output test [Success]" +} + +test_list_json +exit $err diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index 8dd115dd35a7e1d0e57b5907459d0e995221e9c2..a78d35d2cff070d731769e1f8c73cb54354d7835 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -2,10 +2,17 @@ # perf pipe recording and injection test # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + data=$(mktemp /tmp/perf.data.XXXXXX) prog="perf test -w noploop" task="perf" -sym="noploop" if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then echo "cannot find the test file in the perf report" diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index eebeea6bdc767a73168c81cd64bd13d049060508..72c65570db378c74d562d6d361faa2d4c01760fd 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -45,7 +45,10 @@ trace_libc_inet_pton_backtrace() { ;; ppc64|ppc64le) eventattr='max-stack=4' - echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + # Add gaih_inet to expected backtrace only if it is part of libc. + if nm $libc | grep -F -q gaih_inet.; then + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + fi echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 29443b8e8876502aa5cb7e291dcb0e104f8a3da1..3d1a7759a7b2da83fb9742e62b9021713a43f21c 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -8,10 +8,19 @@ shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh . "${shelldir}"/lib/waiting.sh +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +testsym="test_loop" + +skip_test_missing_symbol ${testsym} + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog="perf test -w thloop" -testsym="test_loop" +cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" +br_cntr_file="/caps/branch_counter_nr" +br_cntr_output="branch stack counters" cleanup() { rm -rf "${perfdata}" @@ -155,10 +164,37 @@ test_workload() { echo "Basic target workload test [Success]" } +test_branch_counter() { + echo "Basic branch counter test" + # Check if the branch counter feature is supported + for dir in $cpu_pmu_dir + do + if [ ! -e "$dir$br_cntr_file" ] + then + echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]" + return + fi + done + if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null + then + echo "Basic branch counter test [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output" + then + echo "Basic branch record test [Failed missing output]" + err=1 + return + fi + echo "Basic branch counter test [Success]" +} + test_per_thread test_register_capture test_system_wide test_workload +test_branch_counter cleanup exit $err diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index a1ef8f0d2b5cc1c5f1257e159b696d294875f874..67c925f3a15aa7fd78e0a1b779ce4c85e80246be 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -77,9 +77,9 @@ test_offcpu_child() { err=1 return fi - # each process waits for read and write, so it should be more than 800 events + # each process waits at least for poll, so it should be more than 400 events if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \ - awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }' + awk -F ";" '{ if (NF > 3 && int($3) < 400) exit 1; }' then echo "Child task off-cpu test [Failed invalid output]" err=1 diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh new file mode 100755 index 0000000000000000000000000000000000000000..5ae7bd0031a8226ab7e1f38ed4869e9058f0cf1d --- /dev/null +++ b/tools/perf/tests/shell/script.sh @@ -0,0 +1,66 @@ +#!/bin/sh +# perf script tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +temp_dir=$(mktemp -d /tmp/perf-test-script.XXXXXXXXXX) + +perfdatafile="${temp_dir}/perf.data" +db_test="${temp_dir}/db_test.py" + +err=0 + +cleanup() +{ + trap - EXIT TERM INT + sane=$(echo "${temp_dir}" | cut -b 1-21) + if [ "${sane}" = "/tmp/perf-test-script" ] ; then + echo "--- Cleaning up ---" + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +} + +trap_cleanup() +{ + cleanup + exit 1 +} + +trap trap_cleanup EXIT TERM INT + + +test_db() +{ + echo "DB test" + + # Check if python script is supported + libpython=$(perf version --build-options | grep python | grep -cv OFF) + if [ "${libpython}" != "1" ] ; then + echo "SKIP: python scripting is not supported" + err=2 + return + fi + + cat << "_end_of_file_" > "${db_test}" +perf_db_export_mode = True +perf_db_export_calls = False +perf_db_export_callchains = True + +def sample_table(*args): + print(f'sample_table({args})') + +def call_path_table(*args): + print(f'call_path_table({args}') +_end_of_file_ + perf record -g -o "${perfdatafile}" true + perf script -i "${perfdatafile}" -s "${db_test}" + echo "DB test [Success]" +} + +test_db + +cleanup + +exit $err diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index 196e22672c50cf6572db3b08c8974c75abb1ae56..3bc900533a5d65e5f7c3495022802857da517388 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -8,20 +8,10 @@ set -e skip_test=0 +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - elif which python > /dev/null - then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX) diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index c77955419173190216d9af049bb88bd3ab1bcc1e..d2a3506e0d196c997d7eb663d6da06b530ea5681 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -4,7 +4,7 @@ set -e -# Test all PMU events; however exclude parametrized ones (name contains '?') +# Test all PMU events; however exclude parameterized ones (name contains '?') for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh index ad94c936de7e878173e95ee949ae9146aa295239..7ca172599aa6cdac7adb47d16716ff3ba3746e63 100755 --- a/tools/perf/tests/shell/stat_metrics_values.sh +++ b/tools/perf/tests/shell/stat_metrics_values.sh @@ -1,16 +1,10 @@ #!/bin/bash # perf metrics value validation # SPDX-License-Identifier: GPL-2.0 -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - else - echo Skipping test, python3 not detected please set environment variable PYTHON. - exit 2 - fi -fi + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index 66dfdfdad553f4c6b580e928d8b870840b831269..e342e6c8aa50c41ddb86730e263c321907800d73 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -2,8 +2,14 @@ # Check Arm64 callgraphs are complete in fp mode # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + lscpu | grep -q "aarch64" || exit 2 +skip_test_missing_symbol leafloop + PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) TEST_PROGRAM="perf test -w leafloop" diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 09908d71c9941d3c4e3cb1d81cc08617581e13d8..5f14d0cb013f838629446abc4f15484edb2cd7d8 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -4,6 +4,10 @@ # SPDX-License-Identifier: GPL-2.0 # German Gomez , 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + # skip the test if the hardware doesn't support branch stack sampling # and if the architecture doesn't support filter types: any,save_type,u if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then @@ -11,6 +15,8 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev exit 2 fi +skip_test_missing_symbol brstack_bench + TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) TESTPROG="perf test -w brstack" diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 69bb6fe86c5078a8325dedbcc2ca045a6d552f2a..3dfa91832aa87f89b8f0ef0c1ba51df6d130d2d2 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -4,6 +4,13 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan , 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/waiting.sh +. "${shelldir}"/lib/waiting.sh + +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + skip_if_no_mem_event() { perf mem record -e list 2>&1 | grep -E -q 'available' && return 0 return 2 @@ -11,8 +18,11 @@ skip_if_no_mem_event() { skip_if_no_mem_event || exit 2 +skip_test_missing_symbol buf1 + TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX) check_result() { # The memory report format is as below: @@ -50,13 +60,15 @@ echo "Recording workload..." # specific CPU and test in per-CPU mode. is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo) if (($is_amd >= 1)); then - perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM & + perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" & else - perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM & + perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}" & fi PERFPID=$! +wait_for_perf_to_start ${PERFPID} "${ERR_FILE}" + sleep 1 kill $PERFPID diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh index 6ded58f98f55b26f6f538cf5f457007625be7700..c4f1b59d116f6e4705d872824339b234180e206f 100755 --- a/tools/perf/tests/shell/test_perf_data_converter_json.sh +++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh @@ -6,16 +6,9 @@ set -e err=0 -if [ "$PYTHON" = "" ] ; then - if which python3 > /dev/null ; then - PYTHON=python3 - elif which python > /dev/null ; then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) result=$(mktemp /tmp/__perf_test.output.json.XXXXX) diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index 1de7478ec1894d7799d723f09f9384937710bf40..e6fd934b027a3d0ca36f434135cbdc245acd666d 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -57,36 +57,79 @@ static struct perf_event_attr make_event_attr(void) #ifdef HAVE_BPF_SKEL #include -static bool attr_has_sigtrap(void) +static struct btf *btf; + +static bool btf__available(void) { - bool ret = false; - struct btf *btf; - const struct btf_type *t; + if (btf == NULL) + btf = btf__load_vmlinux_btf(); + + return btf != NULL; +} + +static void btf__exit(void) +{ + btf__free(btf); + btf = NULL; +} + +static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name) +{ + const struct btf_type *t = btf__type_by_id(btf, type_id); const struct btf_member *m; - const char *name; - int i, id; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + const char *current_member_name = btf__name_by_offset(btf, m->name_off); + if (!strcmp(current_member_name, member_name)) + return m; + } - btf = btf__load_vmlinux_btf(); - if (btf == NULL) { + return NULL; +} + +static bool attr_has_sigtrap(void) +{ + int id; + + if (!btf__available()) { /* should be an old kernel */ return false; } id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT); if (id < 0) - goto out; + return false; - t = btf__type_by_id(btf, id); - for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { - name = btf__name_by_offset(btf, m->name_off); - if (!strcmp(name, "sigtrap")) { - ret = true; - break; - } - } -out: - btf__free(btf); - return ret; + return __btf_type__find_member_by_name(id, "sigtrap") != NULL; +} + +static bool kernel_with_sleepable_spinlocks(void) +{ + const struct btf_member *member; + const struct btf_type *type; + const char *type_name; + int id; + + if (!btf__available()) + return false; + + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT); + if (id < 0) + return false; + + // Only RT has a "lock" member for "struct spinlock" + member = __btf_type__find_member_by_name(id, "lock"); + if (member == NULL) + return false; + + // But check its type as well + type = btf__type_by_id(btf, member->type); + if (!type || !btf_is_struct(type)) + return false; + + type_name = btf__name_by_offset(btf, type->name_off); + return type_name && !strcmp(type_name, "rt_mutex_base"); } #else /* !HAVE_BPF_SKEL */ static bool attr_has_sigtrap(void) @@ -109,6 +152,15 @@ static bool attr_has_sigtrap(void) return ret; } + +static bool kernel_with_sleepable_spinlocks(void) +{ + return false; +} + +static void btf__exit(void) +{ +} #endif /* HAVE_BPF_SKEL */ static void @@ -147,7 +199,7 @@ static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) { - int ret; + int ret, expected_sigtraps; ctx.iterate_on = 3000; @@ -156,7 +208,16 @@ static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrie ret = run_test_threads(threads, barrier); TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + expected_sigtraps = NUM_THREADS * ctx.iterate_on; + + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) { + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n", + expected_sigtraps, ctx.signal_count); + pr_debug("See https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/\n"); + return TEST_SKIP; + } else + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); #if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ @@ -221,6 +282,7 @@ out_restore_sigaction: sigaction(SIGTRAP, &oldact, NULL); out: pthread_barrier_destroy(&barrier); + btf__exit(); return ret; } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4d7493fa01059112ff283e36b46e22f95839ecdf..290716783ac6a28d06567f7827ccb1bf68ffb135 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } evlist__add(evlist, evsel); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(getpid()); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e52b031bedc5a9b545fe9b6c8d3f93b6aaefd03d..5cab17a1942e67d7767161a21fdee07599912d61 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub goto out_err; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed!\n"); goto out_err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 968dddde6ddaf0bede4fc7679d57bbb4ad2cd35b..d33d0952025cf5b65e80fb7e8464f634b5c313c1 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _ * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(-1); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b394f3ac2d667bacb2d9e16aca78715c65ab5ed3..dad3d7414142d1befc3d6eebe48d81a39ace153a 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -207,5 +207,6 @@ DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); extern const char *dso_to_test; +extern const char *test_objdump_path; #endif /* TESTS_H */ diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 9dee63734e66a0c1b08c62bb1bbfb393ad9866d2..2a842f53fbb575a2f715d66898ff1bb6553dfa7f 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su if (session_write_header(path)) goto free_path; - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); goto free_path; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 1078a93b01aa018f3be1932dee7f3f694ad51828..822f893e67d5f6643f5f1794801b87630f64fca1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -112,18 +112,92 @@ static bool is_ignored_symbol(const char *name, char type) return false; } +struct test__vmlinux_matches_kallsyms_cb_args { + struct machine kallsyms; + struct map *vmlinux_map; + bool header_printed; +}; + +static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct dso *dso = map__dso(map); + /* + * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while + * the kernel will have the path for the vmlinux file being used, so use + * the short name, less descriptive but the same ("[kernel]" in both + * cases. + */ + struct map *pair = maps__find_by_name(args->kallsyms.kmaps, + (dso->kernel ? dso->short_name : dso->name)); + + if (pair) + map__set_priv(pair, 1); + else { + if (!args->header_printed) { + pr_info("WARN: Maps only in vmlinux:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct map *pair; + u64 mem_start = map__unmap_ip(args->vmlinux_map, map__start(map)); + u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map)); + + pair = maps__find(args->kallsyms.kmaps, mem_start); + if (pair == NULL || map__priv(pair)) + return 0; + + if (map__start(pair) == mem_start) { + struct dso *dso = map__dso(map); + + if (!args->header_printed) { + pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); + args->header_printed = true; + } + + pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", + map__start(map), map__end(map), map__pgoff(map), dso->name); + if (mem_end != map__end(pair)) + pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, + map__start(pair), map__end(pair), map__pgoff(pair)); + pr_info(" %s\n", dso->name); + map__set_priv(pair, 1); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb3(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + + if (!map__priv(map)) { + if (!args->header_printed) { + pr_info("WARN: Maps only in kallsyms:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct rb_node *nd; struct symbol *sym; - struct map *kallsyms_map, *vmlinux_map; - struct map_rb_node *rb_node; - struct machine kallsyms, vmlinux; + struct map *kallsyms_map; + struct machine vmlinux; struct maps *maps; u64 mem_start, mem_end; - bool header_printed; + struct test__vmlinux_matches_kallsyms_cb_args args; /* * Step 1: @@ -131,7 +205,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * Init the machines that will hold kernel, modules obtained from * both vmlinux + .ko files and from /proc/kallsyms split by modules. */ - machine__init(&kallsyms, "", HOST_KERNEL_ID); + machine__init(&args.kallsyms, "", HOST_KERNEL_ID); machine__init(&vmlinux, "", HOST_KERNEL_ID); maps = machine__kernel_maps(&vmlinux); @@ -143,7 +217,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * load /proc/kallsyms. Also create the modules maps from /proc/modules * and find the .ko files that match them in /lib/modules/`uname -r`/. */ - if (machine__create_kernel_maps(&kallsyms) < 0) { + if (machine__create_kernel_maps(&args.kallsyms) < 0) { pr_debug("machine__create_kernel_maps failed"); err = TEST_SKIP; goto out; @@ -160,7 +234,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { + if (machine__load_kallsyms(&args.kallsyms, "/proc/kallsyms") <= 0) { pr_debug("machine__load_kallsyms failed"); err = TEST_SKIP; goto out; @@ -174,7 +248,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * to see if the running kernel was relocated by checking if it has the * same value in the vmlinux file we load. */ - kallsyms_map = machine__kernel_map(&kallsyms); + kallsyms_map = machine__kernel_map(&args.kallsyms); /* * Step 5: @@ -186,7 +260,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused goto out; } - vmlinux_map = machine__kernel_map(&vmlinux); + args.vmlinux_map = machine__kernel_map(&vmlinux); /* * Step 6: @@ -213,7 +287,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * in the kallsyms dso. For the ones that are in both, check its names and * end addresses too. */ - map__for_each_symbol(vmlinux_map, sym, nd) { + map__for_each_symbol(args.vmlinux_map, sym, nd) { struct symbol *pair, *first_pair; sym = rb_entry(nd, struct symbol, rb_node); @@ -221,10 +295,10 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused if (sym->start == sym->end) continue; - mem_start = map__unmap_ip(vmlinux_map, sym->start); - mem_end = map__unmap_ip(vmlinux_map, sym->end); + mem_start = map__unmap_ip(args.vmlinux_map, sym->start); + mem_end = map__unmap_ip(args.vmlinux_map, sym->end); - first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); + first_pair = machine__find_kernel_symbol(&args.kallsyms, mem_start, NULL); pair = first_pair; if (pair && UM(pair->start) == mem_start) { @@ -253,7 +327,8 @@ next_pair: */ continue; } else { - pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); + pair = machine__find_kernel_symbol_by_name(&args.kallsyms, + sym->name, NULL); if (pair) { if (UM(pair->start) == mem_start) goto next_pair; @@ -267,7 +342,7 @@ next_pair: continue; } - } else if (mem_start == map__end(kallsyms.vmlinux_map)) { + } else if (mem_start == map__end(args.kallsyms.vmlinux_map)) { /* * Ignore aliases to _etext, i.e. to the end of the kernel text area, * such as __indirect_thunk_end. @@ -289,78 +364,18 @@ next_pair: if (verbose <= 0) goto out; - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - struct dso *dso = map__dso(map); - /* - * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while - * the kernel will have the path for the vmlinux file being used, - * so use the short name, less descriptive but the same ("[kernel]" in - * both cases. - */ - struct map *pair = maps__find_by_name(kallsyms.kmaps, (dso->kernel ? - dso->short_name : - dso->name)); - if (pair) { - map__set_priv(pair, 1); - } else { - if (!header_printed) { - pr_info("WARN: Maps only in vmlinux:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } - - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *pair, *map = rb_node->map; - - mem_start = map__unmap_ip(vmlinux_map, map__start(map)); - mem_end = map__unmap_ip(vmlinux_map, map__end(map)); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb1, &args); - pair = maps__find(kallsyms.kmaps, mem_start); - if (pair == NULL || map__priv(pair)) - continue; - - if (map__start(pair) == mem_start) { - struct dso *dso = map__dso(map); - - if (!header_printed) { - pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); - header_printed = true; - } - - pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", - map__start(map), map__end(map), map__pgoff(map), dso->name); - if (mem_end != map__end(pair)) - pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, - map__start(pair), map__end(pair), map__pgoff(pair)); - pr_info(" %s\n", dso->name); - map__set_priv(pair, 1); - } - } - - header_printed = false; - - maps = machine__kernel_maps(&kallsyms); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb2, &args); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; + args.header_printed = false; + maps = machine__kernel_maps(&args.kallsyms); + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb3, &args); - if (!map__priv(map)) { - if (!header_printed) { - pr_info("WARN: Maps only in kallsyms:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } out: - machine__exit(&kallsyms); + machine__exit(&args.kallsyms); machine__exit(&vmlinux); return err; } diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c index af05269c2eb8a4a197de87bd9a7bfec00daefb14..457b29f91c3ee277429393bd4095a4de63ee7365 100644 --- a/tools/perf/tests/workloads/thloop.c +++ b/tools/perf/tests/workloads/thloop.c @@ -7,7 +7,6 @@ #include "../tests.h" static volatile sig_atomic_t done; -static volatile unsigned count; /* We want to check this symbol in perf report */ noinline void test_loop(void); @@ -19,8 +18,7 @@ static void sighandler(int sig __maybe_unused) noinline void test_loop(void) { - while (!done) - __atomic_fetch_add(&count, 1, __ATOMIC_RELAXED); + while (!done); } static void *thfunc(void *arg) diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index cc09dcaa891e04bb66e0a60cb496111acd1c9b72..7df4bf5b55a3cc2a8c5a31462129e8ac829a4e59 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -57,13 +57,13 @@ create_arch_errno_table_func() archlist="$1" default="$2" - printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n' + printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n' printf '{\n' for arch in $archlist; do printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch") - printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch") + printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch") done - printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default") + printf '\treturn errno_to_name__%s;\n' $(arch_string "$default") printf '}\n' } @@ -76,7 +76,9 @@ EoHEADER # Create list of architectures that have a specific errno.h. archlist="" -for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do +for f in $toolsdir/arch/*/include/uapi/asm/errno.h; do + d=${f%/include/uapi/asm/errno.h} + arch="${d##*/}" test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch" done diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 788e8f6bd90eb753af7b7e2928831837c99ff789..9feb794f5c6e15f408a372665f537df5a630e5a0 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -251,6 +251,4 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool sh void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); -const char *arch_syscalls__strerrno(const char *arch, int err); - #endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 8059342ca4126c381a144073f169cba6bc46a059..9455d9672f140d13daa1502bf5faf3c7986a8cc5 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,9 +4,9 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' +regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$' grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ - sed -r "s/$regex/\2 \1/g" | \ + sed -E "s%$regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh index 8bc7ba62203e4a9d3c327487027d05d4da5a21fe..670c6db298ae029812a5eaa885cb986e7a1f56b2 100755 --- a/tools/perf/trace/beauty/socket.sh +++ b/tools/perf/trace/beauty/socket.sh @@ -18,10 +18,10 @@ grep -E $ipproto_regex ${uapi_header_dir}/in.h | \ printf "};\n\n" printf "static const char *socket_level[] = {\n" -socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' +socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+/.*)?' grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \ - sed -r "s/$socket_level_regex/\2 \1/g" | \ + sed -E "s%$socket_level_regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ccdb2cd11fbf0325f1e2bcfa35fa86d5986b3261..ec5e21932876038b99afbfa30560d856efd8afd5 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -27,7 +27,6 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; - struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -38,11 +37,10 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser) return symbol__annotation(ms->sym); } -static bool disasm_line__filter(struct ui_browser *browser, void *entry) +static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { - struct annotation *notes = browser__annotation(browser); struct annotation_line *al = list_entry(entry, struct annotation_line, node); - return annotation_line__filter(al, notes); + return annotation_line__filter(al); } static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current) @@ -97,7 +95,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int struct annotation_write_ops ops = { .first_line = row == 0, .current_entry = is_current_entry, - .change_color = (!notes->options->hide_src_code && + .change_color = (!annotate_opts.hide_src_code && (!is_current_entry || (browser->use_navkeypressed && !browser->navkeypressed))), @@ -114,7 +112,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) ops.width += 1; - annotation_line__write(al, notes, &ops, ab->opts); + annotation_line__write(al, notes, &ops); if (ops.current_entry) ab->selection = al; @@ -128,7 +126,7 @@ static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) while (pos && pos->al.offset == -1) { pos = list_prev_entry(pos, al.node); - if (!ab->opts->hide_src_code) + if (!annotate_opts.hide_src_code) diff++; } @@ -188,14 +186,14 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) * name right after the '<' token and probably treating this like a * 'call' instruction. */ - target = notes->offsets[cursor->ops.target.offset]; + target = notes->src->offsets[cursor->ops.target.offset]; if (target == NULL) { ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n", cursor->ops.target.offset); return; } - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { from = cursor->al.idx_asm; to = target->idx_asm; } else { @@ -224,7 +222,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) int ret = ui_browser__list_head_refresh(browser); int pcnt_width = annotation__pcnt_width(notes); - if (notes->options->jump_arrows) + if (annotate_opts.jump_arrows) annotate_browser__draw_current_jump(browser); ui_browser__set_color(browser, HE_COLORSET_NORMAL); @@ -258,7 +256,7 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, browser->opts->percent_type) < 0) + if (disasm__cmp(al, l, annotate_opts.percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -270,7 +268,6 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, static void annotate_browser__set_top(struct annotate_browser *browser, struct annotation_line *pos, u32 idx) { - struct annotation *notes = browser__annotation(&browser->b); unsigned back; ui_browser__refresh_dimensions(&browser->b); @@ -280,7 +277,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, while (browser->b.top_idx != 0 && back != 0) { pos = list_entry(pos->node.prev, struct annotation_line, node); - if (annotation_line__filter(pos, notes)) + if (annotation_line__filter(pos)) continue; --browser->b.top_idx; @@ -294,11 +291,10 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct annotation *notes = browser__annotation(&browser->b); struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node); u32 idx = pos->idx; - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) idx = pos->idx_asm; annotate_browser__set_top(browser, pos, idx); browser->curr_hot = nd; @@ -331,13 +327,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double percent; percent = annotation_data__percent(&pos->al.data[i], - browser->opts->percent_type); + annotate_opts.percent_type); if (max_percent < percent) max_percent = percent; } - if (max_percent < 0.01 && pos->al.ipc == 0) { + if (max_percent < 0.01 && (!pos->al.cycles || pos->al.cycles->ipc == 0)) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } @@ -380,12 +376,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); al = list_entry(browser->b.top, struct annotation_line, node); - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { if (al->idx_asm < offset) offset = al->idx; - browser->b.nr_entries = notes->nr_entries; - notes->options->hide_src_code = false; + browser->b.nr_entries = notes->src->nr_entries; + annotate_opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; browser->b.index = al->idx; @@ -402,8 +398,8 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx_asm; - browser->b.nr_entries = notes->nr_asm_entries; - notes->options->hide_src_code = true; + browser->b.nr_entries = notes->src->nr_asm_entries; + annotate_opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; browser->b.index = al->idx_asm; @@ -435,7 +431,7 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); ui_browser__reset_index(browser); - browser->nr_entries = notes->nr_asm_entries; + browser->nr_entries = notes->src->nr_asm_entries; } static int sym_title(struct symbol *sym, struct map *map, char *title, @@ -483,8 +479,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; annotation__unlock(notes); - symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); - sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); + symbol__tui_annotate(&target_ms, evsel, hbt); + sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type); ui_browser__show_title(&browser->b, title); return true; } @@ -500,7 +496,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->al.offset == offset) return pos; - if (!annotation_line__filter(&pos->al, notes)) + if (!annotation_line__filter(&pos->al)) ++*idx; } @@ -544,7 +540,7 @@ struct annotation_line *annotate_browser__find_string(struct annotate_browser *b *idx = browser->b.index; list_for_each_entry_continue(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; ++*idx; @@ -581,7 +577,7 @@ struct annotation_line *annotate_browser__find_string_reverse(struct annotate_br *idx = browser->b.index; list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; --*idx; @@ -659,7 +655,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) { - struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct map_symbol *ms = browser->priv; struct symbol *sym = ms->sym; char symbol_dso[SYM_TITLE_MAX_SIZE]; @@ -667,7 +662,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const if (ui_browser__show(browser, title, help) < 0) return -1; - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type); ui_browser__gotorc_title(browser, 0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); @@ -809,7 +804,7 @@ static int annotate_browser__run(struct annotate_browser *browser, annotate_browser__show(&browser->b, title, help); continue; case 'k': - notes->options->show_linenr = !notes->options->show_linenr; + annotate_opts.show_linenr = !annotate_opts.show_linenr; continue; case 'l': annotate_browser__show_full_location (&browser->b); @@ -822,18 +817,18 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts(help); continue; case 'o': - notes->options->use_offset = !notes->options->use_offset; + annotate_opts.use_offset = !annotate_opts.use_offset; annotation__update_column_widths(notes); continue; case 'O': - if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) - notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL; continue; case 'j': - notes->options->jump_arrows = !notes->options->jump_arrows; + annotate_opts.jump_arrows = !annotate_opts.jump_arrows; continue; case 'J': - notes->options->show_nr_jumps = !notes->options->show_nr_jumps; + annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps; annotation__update_column_widths(notes); continue; case '/': @@ -860,7 +855,7 @@ show_help: browser->b.height, browser->b.index, browser->b.top_idx, - notes->nr_asm_entries); + notes->src->nr_asm_entries); } continue; case K_ENTER: @@ -884,7 +879,7 @@ show_sup_ins: continue; } case 'P': - map_symbol__annotation_dump(ms, evsel, browser->opts); + map_symbol__annotation_dump(ms, evsel); continue; case 't': if (symbol_conf.show_total_period) { @@ -897,15 +892,15 @@ show_sup_ins: annotation__update_column_widths(notes); continue; case 'c': - if (notes->options->show_minmax_cycle) - notes->options->show_minmax_cycle = false; + if (annotate_opts.show_minmax_cycle) + annotate_opts.show_minmax_cycle = false; else - notes->options->show_minmax_cycle = true; + annotate_opts.show_minmax_cycle = true; annotation__update_column_widths(notes); continue; case 'p': case 'b': - switch_percent_type(browser->opts, key == 'b'); + switch_percent_type(&annotate_opts, key == 'b'); hists__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); continue; @@ -932,26 +927,24 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { - return symbol__tui_annotate(ms, evsel, hbt, opts); + return symbol__tui_annotate(ms, evsel, hbt); } int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); - return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); @@ -965,7 +958,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .priv = ms, .use_navkeypressed = true, }, - .opts = opts, }; struct dso *dso; int ret = -1, err; @@ -979,7 +971,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, return -1; if (not_annotated) { - err = symbol__annotate2(ms, evsel, opts, &browser.arch); + err = symbol__annotate2(ms, evsel, &browser.arch); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; @@ -991,12 +983,12 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ui_helpline__push("Press ESC to exit"); - browser.b.width = notes->max_line_len; - browser.b.nr_entries = notes->nr_entries; + browser.b.width = notes->src->max_line_len; + browser.b.nr_entries = notes->src->nr_entries; browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) ui_browser__init_asm_mode(&browser.b); ret = annotate_browser__run(&browser, evsel, hbt); @@ -1006,6 +998,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, out_free_offsets: if(not_annotated) - zfree(¬es->offsets); + zfree(¬es->src->offsets); return ret; } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4812b226818122b92ad5cdce9ba36071585b0db..0c02b3a8e121ffaaaa17647237ed5ab2c9ad67d0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2250,8 +2250,7 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env, - struct annotation_options *annotation_opts) + struct perf_env *env) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2259,7 +2258,6 @@ perf_evsel_browser__new(struct evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; - browser->annotation_opts = annotation_opts; } return browser; } @@ -2432,8 +2430,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!browser->annotation_opts->objdump_path && - perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) + if (!annotate_opts.objdump_path && + perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2445,8 +2443,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) else evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, - browser->annotation_opts); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2943,11 +2940,10 @@ next: static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + struct perf_env *env, bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); struct branch_info *bi = NULL; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3004,6 +3000,7 @@ static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *h /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); if (min_pcnt) browser->min_pcnt = min_pcnt; @@ -3398,7 +3395,6 @@ out: struct evsel_menu { struct ui_browser b; struct evsel *selection; - struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3499,8 +3495,7 @@ browse_hists: hbt->timer(hbt->arg); key = evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, menu->env, - warn_lost_event, - menu->annotation_opts); + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3557,7 +3552,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event, struct annotation_options *annotation_opts) + bool warn_lost_event) { struct evsel *pos; struct evsel_menu menu = { @@ -3572,7 +3567,6 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con }, .min_pcnt = min_pcnt, .env = env, - .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3607,8 +3601,7 @@ static bool evlist__single_entry(struct evlist *evlist) } int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + float min_pcnt, struct perf_env *env, bool warn_lost_event) { int nr_entries = evlist->core.nr_entries; @@ -3617,7 +3610,7 @@ single_entry: { struct evsel *first = evlist__first(evlist); return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event, annotation_opts); + env, warn_lost_event); } } @@ -3635,7 +3628,7 @@ single_entry: { } return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event, annotation_opts); + warn_lost_event); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, @@ -3654,8 +3647,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, } int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts) + float min_percent, struct perf_env *env) { struct hists *hists = &bh->block_hists; struct hist_browser *browser; @@ -3672,11 +3664,11 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, browser->title = block_hists_browser__title; browser->min_pcnt = min_percent; browser->env = env; - browser->annotation_opts = annotation_opts; /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); memset(&action, 0, sizeof(action)); diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 1e938d9ffa5ee26177152840acdf73072db6c289..de46f6c56b0ef0d798c106598446f044670f2994 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,7 +4,6 @@ #include "ui/browser.h" -struct annotation_options; struct evsel; struct hist_browser { @@ -15,7 +14,6 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; - struct annotation_options *annotation_opts; struct evsel *block_evsel; int print_seq; bool show_dso; diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 47d2c7a8cbe13cba1a3f9d46fd3c720cf0d2149c..50d45054ed6c1b435faf5cb4634ceae6eea03491 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -166,6 +166,7 @@ void run_script(char *cmd) printf("\033[c\033[H\033[J"); fflush(stdout); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); SLsmg_refresh(); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 2effac77ca8c6742fcd8a0287f0ccf54235c8d56..394861245fd3e48ff1cc43ae14b97dd2213dc64e 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -162,7 +162,6 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, } static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { struct dso *dso = map__dso(ms->map); @@ -176,7 +175,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, if (dso->annotate_warned) return -1; - err = symbol__annotate(ms, evsel, options, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; @@ -244,10 +243,9 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { - return symbol__gtk_annotate(&he->ms, evsel, options, hbt); + return symbol__gtk_annotate(&he->ms, evsel, hbt); } void perf_gtk__show_annotations(void) diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 1e84dceb52671385696db95e603b008e0d19efda..a2b497f03fd6e478f11136e5ef21e987f4aeb89d 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -56,13 +56,11 @@ struct evsel; struct evlist; struct hist_entry; struct hist_browser_timer; -struct annotation_options; int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt); int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); void perf_gtk__show_annotations(void); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 605d9e175ea73b662a51c9fe0257a073fcfaf199..16c6eff4d24116b0fde68f60319330f1d4a0f1ac 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -2,12 +2,14 @@ #include #include #include +#include #include #include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif +#include "../../util/color.h" #include "../../util/debug.h" #include "../browser.h" #include "../helpline.h" @@ -121,6 +123,23 @@ static void ui__signal(int sig) exit(0); } +static void ui__sigcont(int sig) +{ + static struct termios tty; + + if (sig == SIGTSTP) { + while (tcgetattr(SLang_TT_Read_FD, &tty) == -1 && errno == EINTR) + ; + while (write(SLang_TT_Read_FD, PERF_COLOR_RESET, sizeof(PERF_COLOR_RESET) - 1) == -1 && errno == EINTR) + ; + raise(SIGSTOP); + } else { + while (tcsetattr(SLang_TT_Read_FD, TCSADRAIN, &tty) == -1 && errno == EINTR) + ; + raise(SIGWINCH); + } +} + int ui__init(void) { int err; @@ -135,6 +154,7 @@ int ui__init(void) err = SLang_init_tty(-1, 0, 0); if (err < 0) goto out; + SLtty_set_suspend_state(true); err = SLkp_init(); if (err < 0) { @@ -149,6 +169,8 @@ int ui__init(void) signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); + signal(SIGTSTP, ui__sigcont); + signal(SIGCONT, ui__sigcont); perf_error__register(&perf_tui_eops); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 988473bf907aee74f9863fe52bb59a5f3b4dd387..8027f450fa3e489e04769f42a146e4438350dbbb 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -195,6 +195,8 @@ endif perf-$(CONFIG_DWARF) += probe-finder.o perf-$(CONFIG_DWARF) += dwarf-aux.o perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += debuginfo.o +perf-$(CONFIG_DWARF) += annotate-data.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c new file mode 100644 index 0000000000000000000000000000000000000000..f22b4f18271c96e406ce5109648e215c08372b86 --- /dev/null +++ b/tools/perf/util/annotate-data.c @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convert sample address to data type using DWARF debug info. + * + * Written by Namhyung Kim + */ + +#include +#include +#include + +#include "annotate-data.h" +#include "debuginfo.h" +#include "debug.h" +#include "dso.h" +#include "evsel.h" +#include "evlist.h" +#include "map.h" +#include "map_symbol.h" +#include "strbuf.h" +#include "symbol.h" +#include "symbol_conf.h" + +/* + * Compare type name and size to maintain them in a tree. + * I'm not sure if DWARF would have information of a single type in many + * different places (compilation units). If not, it could compare the + * offset of the type entry in the .debug_info section. + */ +static int data_type_cmp(const void *_key, const struct rb_node *node) +{ + const struct annotated_data_type *key = _key; + struct annotated_data_type *type; + + type = rb_entry(node, struct annotated_data_type, node); + + if (key->self.size != type->self.size) + return key->self.size - type->self.size; + return strcmp(key->self.type_name, type->self.type_name); +} + +static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) +{ + struct annotated_data_type *a, *b; + + a = rb_entry(node_a, struct annotated_data_type, node); + b = rb_entry(node_b, struct annotated_data_type, node); + + if (a->self.size != b->self.size) + return a->self.size < b->self.size; + return strcmp(a->self.type_name, b->self.type_name) < 0; +} + +/* Recursively add new members for struct/union */ +static int __add_member_cb(Dwarf_Die *die, void *arg) +{ + struct annotated_member *parent = arg; + struct annotated_member *member; + Dwarf_Die member_type, die_mem; + Dwarf_Word size, loc; + Dwarf_Attribute attr; + struct strbuf sb; + int tag; + + if (dwarf_tag(die) != DW_TAG_member) + return DIE_FIND_CB_SIBLING; + + member = zalloc(sizeof(*member)); + if (member == NULL) + return DIE_FIND_CB_END; + + strbuf_init(&sb, 32); + die_get_typename(die, &sb); + + die_get_real_type(die, &member_type); + if (dwarf_aggregate_size(&member_type, &size) < 0) + size = 0; + + if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) + loc = 0; + else + dwarf_formudata(&attr, &loc); + + member->type_name = strbuf_detach(&sb, NULL); + /* member->var_name can be NULL */ + if (dwarf_diename(die)) + member->var_name = strdup(dwarf_diename(die)); + member->size = size; + member->offset = loc + parent->offset; + INIT_LIST_HEAD(&member->children); + list_add_tail(&member->node, &parent->children); + + tag = dwarf_tag(&member_type); + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + die_find_child(&member_type, __add_member_cb, member, &die_mem); + break; + default: + break; + } + return DIE_FIND_CB_SIBLING; +} + +static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) +{ + Dwarf_Die die_mem; + + die_find_child(type, __add_member_cb, &parent->self, &die_mem); +} + +static void delete_members(struct annotated_member *member) +{ + struct annotated_member *child, *tmp; + + list_for_each_entry_safe(child, tmp, &member->children, node) { + list_del(&child->node); + delete_members(child); + free(child->type_name); + free(child->var_name); + free(child); + } +} + +static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, + Dwarf_Die *type_die) +{ + struct annotated_data_type *result = NULL; + struct annotated_data_type key; + struct rb_node *node; + struct strbuf sb; + char *type_name; + Dwarf_Word size; + + strbuf_init(&sb, 32); + if (die_get_typename_from_type(type_die, &sb) < 0) + strbuf_add(&sb, "(unknown type)", 14); + type_name = strbuf_detach(&sb, NULL); + dwarf_aggregate_size(type_die, &size); + + /* Check existing nodes in dso->data_types tree */ + key.self.type_name = type_name; + key.self.size = size; + node = rb_find(&key, &dso->data_types, data_type_cmp); + if (node) { + result = rb_entry(node, struct annotated_data_type, node); + free(type_name); + return result; + } + + /* If not, add a new one */ + result = zalloc(sizeof(*result)); + if (result == NULL) { + free(type_name); + return NULL; + } + + result->self.type_name = type_name; + result->self.size = size; + INIT_LIST_HEAD(&result->self.children); + + if (symbol_conf.annotate_data_member) + add_member_types(result, type_die); + + rb_add(&result->node, &dso->data_types, data_type_less); + return result; +} + +static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) +{ + Dwarf_Off off, next_off; + size_t header_size; + + if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) + return cu_die; + + /* + * There are some kernels don't have full aranges and contain only a few + * aranges entries. Fallback to iterate all CU entries in .debug_info + * in case it's missing. + */ + off = 0; + while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, + NULL, NULL, NULL) == 0) { + if (dwarf_offdie(di->dbg, off + header_size, cu_die) && + dwarf_haspc(cu_die, pc)) + return true; + + off = next_off; + } + return false; +} + +/* The type info will be saved in @type_die */ +static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) +{ + Dwarf_Word size; + + /* Get the type of the variable */ + if (die_get_real_type(var_die, type_die) == NULL) { + pr_debug("variable has no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* + * It expects a pointer type for a memory access. + * Convert to a real type it points to. + */ + if (dwarf_tag(type_die) != DW_TAG_pointer_type || + die_get_real_type(type_die, type_die) == NULL) { + pr_debug("no pointer or no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0) { + pr_debug("type size is unknown\n"); + ann_data_stat.invalid_size++; + return -1; + } + + /* Minimal sanity check */ + if ((unsigned)offset >= size) { + pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); + ann_data_stat.bad_offset++; + return -1; + } + + return 0; +} + +/* The result will be saved in @type_die */ +static int find_data_type_die(struct debuginfo *di, u64 pc, + int reg, int offset, Dwarf_Die *type_die) +{ + Dwarf_Die cu_die, var_die; + Dwarf_Die *scopes = NULL; + int ret = -1; + int i, nr_scopes; + + /* Get a compile_unit for this address */ + if (!find_cu_die(di, pc, &cu_die)) { + pr_debug("cannot find CU for address %" PRIx64 "\n", pc); + ann_data_stat.no_cuinfo++; + return -1; + } + + /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ + nr_scopes = die_get_scopes(&cu_die, pc, &scopes); + + /* Search from the inner-most scope to the outer */ + for (i = nr_scopes - 1; i >= 0; i--) { + /* Look up variables/parameters in this scope */ + if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die)) + continue; + + /* Found a variable, see if it's correct */ + ret = check_variable(&var_die, type_die, offset); + goto out; + } + if (ret < 0) + ann_data_stat.no_var++; + +out: + free(scopes); + return ret; +} + +/** + * find_data_type - Return a data type at the location + * @ms: map and symbol at the location + * @ip: instruction address of the memory access + * @reg: register that holds the base address + * @offset: offset from the base address + * + * This functions searches the debug information of the binary to get the data + * type it accesses. The exact location is expressed by (ip, reg, offset). + * It return %NULL if not found. + */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset) +{ + struct annotated_data_type *result = NULL; + struct dso *dso = map__dso(ms->map); + struct debuginfo *di; + Dwarf_Die type_die; + u64 pc; + + di = debuginfo__new(dso->long_name); + if (di == NULL) { + pr_debug("cannot get the debug info\n"); + return NULL; + } + + /* + * IP is a relative instruction address from the start of the map, as + * it can be randomized/relocated, it needs to translate to PC which is + * a file address for DWARF processing. + */ + pc = map__rip_2objdump(ms->map, ip); + if (find_data_type_die(di, pc, reg, offset, &type_die) < 0) + goto out; + + result = dso__findnew_data_type(dso, &type_die); + +out: + debuginfo__delete(di); + return result; +} + +static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) +{ + int i; + size_t sz = sizeof(struct type_hist); + + sz += sizeof(struct type_hist_entry) * adt->self.size; + + /* Allocate a table of pointers for each event */ + adt->nr_histograms = nr_entries; + adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); + if (adt->histograms == NULL) + return -ENOMEM; + + /* + * Each histogram is allocated for the whole size of the type. + * TODO: Probably we can move the histogram to members. + */ + for (i = 0; i < nr_entries; i++) { + adt->histograms[i] = zalloc(sz); + if (adt->histograms[i] == NULL) + goto err; + } + return 0; + +err: + while (--i >= 0) + free(adt->histograms[i]); + free(adt->histograms); + return -ENOMEM; +} + +static void delete_data_type_histograms(struct annotated_data_type *adt) +{ + for (int i = 0; i < adt->nr_histograms; i++) + free(adt->histograms[i]); + free(adt->histograms); +} + +void annotated_data_type__tree_delete(struct rb_root *root) +{ + struct annotated_data_type *pos; + + while (!RB_EMPTY_ROOT(root)) { + struct rb_node *node = rb_first(root); + + rb_erase(node, root); + pos = rb_entry(node, struct annotated_data_type, node); + delete_members(&pos->self); + delete_data_type_histograms(pos); + free(pos->self.type_name); + free(pos); + } +} + +/** + * annotated_data_type__update_samples - Update histogram + * @adt: Data type to update + * @evsel: Event to update + * @offset: Offset in the type + * @nr_samples: Number of samples at this offset + * @period: Event count at this offset + * + * This function updates type histogram at @ofs for @evsel. Samples are + * aggregated before calling this function so it can be called with more + * than one samples at a certain offset. + */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period) +{ + struct type_hist *h; + + if (adt == NULL) + return 0; + + if (adt->histograms == NULL) { + int nr = evsel->evlist->core.nr_entries; + + if (alloc_data_type_histograms(adt, nr) < 0) + return -1; + } + + if (offset < 0 || offset >= adt->self.size) + return -1; + + h = adt->histograms[evsel->core.idx]; + + h->nr_samples += nr_samples; + h->addr[offset].nr_samples += nr_samples; + h->period += period; + h->addr[offset].period += period; + return 0; +} diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h new file mode 100644 index 0000000000000000000000000000000000000000..8e73096c01d1ab2fd532b0019c1c95d694feb5c8 --- /dev/null +++ b/tools/perf/util/annotate-data.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PERF_ANNOTATE_DATA_H +#define _PERF_ANNOTATE_DATA_H + +#include +#include +#include +#include + +struct evsel; +struct map_symbol; + +/** + * struct annotated_member - Type of member field + * @node: List entry in the parent list + * @children: List head for child nodes + * @type_name: Name of the member type + * @var_name: Name of the member variable + * @offset: Offset from the outer data type + * @size: Size of the member field + * + * This represents a member type in a data type. + */ +struct annotated_member { + struct list_head node; + struct list_head children; + char *type_name; + char *var_name; + int offset; + int size; +}; + +/** + * struct type_hist_entry - Histogram entry per offset + * @nr_samples: Number of samples + * @period: Count of event + */ +struct type_hist_entry { + int nr_samples; + u64 period; +}; + +/** + * struct type_hist - Type histogram for each event + * @nr_samples: Total number of samples in this data type + * @period: Total count of the event in this data type + * @offset: Array of histogram entry + */ +struct type_hist { + u64 nr_samples; + u64 period; + struct type_hist_entry addr[]; +}; + +/** + * struct annotated_data_type - Data type to profile + * @node: RB-tree node for dso->type_tree + * @self: Actual type information + * @nr_histogram: Number of histogram entries + * @histograms: An array of pointers to histograms + * + * This represents a data type accessed by samples in the profile data. + */ +struct annotated_data_type { + struct rb_node node; + struct annotated_member self; + int nr_histograms; + struct type_hist **histograms; +}; + +extern struct annotated_data_type unknown_type; + +/** + * struct annotated_data_stat - Debug statistics + * @total: Total number of entry + * @no_sym: No symbol or map found + * @no_insn: Failed to get disasm line + * @no_insn_ops: The instruction has no operands + * @no_mem_ops: The instruction has no memory operands + * @no_reg: Failed to extract a register from the operand + * @no_dbginfo: The binary has no debug information + * @no_cuinfo: Failed to find a compile_unit + * @no_var: Failed to find a matching variable + * @no_typeinfo: Failed to get a type info for the variable + * @invalid_size: Failed to get a size info of the type + * @bad_offset: The access offset is out of the type + */ +struct annotated_data_stat { + int total; + int no_sym; + int no_insn; + int no_insn_ops; + int no_mem_ops; + int no_reg; + int no_dbginfo; + int no_cuinfo; + int no_var; + int no_typeinfo; + int invalid_size; + int bad_offset; +}; +extern struct annotated_data_stat ann_data_stat; + +#ifdef HAVE_DWARF_SUPPORT + +/* Returns data type at the location (ip, reg, offset) */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset); + +/* Update type access histogram at the given offset */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period); + +/* Release all data type information in the tree */ +void annotated_data_type__tree_delete(struct rb_root *root); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline struct annotated_data_type * +find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, + int reg __maybe_unused, int offset __maybe_unused) +{ + return NULL; +} + +static inline int +annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused, + struct evsel *evsel __maybe_unused, + int offset __maybe_unused, + int nr_samples __maybe_unused, + u64 period __maybe_unused) +{ + return -1; +} + +static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused) +{ +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#endif /* _PERF_ANNOTATE_DATA_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 82956adf99632d742f777f01d4177ce575ddfbd6..9b70ab110ce79f24da580611f1a2098726f1ae12 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -25,12 +25,14 @@ #include "units.h" #include "debug.h" #include "annotate.h" +#include "annotate-data.h" #include "evsel.h" #include "evlist.h" #include "bpf-event.h" #include "bpf-utils.h" #include "block-range.h" #include "string2.h" +#include "dwarf-regs.h" #include "util/event.h" #include "util/sharded_mutex.h" #include "arch/common.h" @@ -57,6 +59,9 @@ #include +/* global annotation options */ +struct annotation_options annotate_opts; + static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); @@ -85,6 +90,8 @@ struct arch { struct { char comment_char; char skip_functions_char; + char register_char; + char memory_ref_char; } objdump; }; @@ -96,6 +103,10 @@ static struct ins_ops nop_ops; static struct ins_ops lock_ops; static struct ins_ops ret_ops; +/* Data type collection debug statistics */ +struct annotated_data_stat ann_data_stat; +LIST_HEAD(ann_insn_stat); + static int arch__grow_instructions(struct arch *arch) { struct ins *new_instructions; @@ -188,6 +199,8 @@ static struct arch architectures[] = { .insn_suffix = "bwlq", .objdump = { .comment_char = '#', + .register_char = '%', + .memory_ref_char = '(', }, }, { @@ -340,10 +353,10 @@ bool ins__is_call(const struct ins *ins) */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { - if (ops->raw_comment && c > ops->raw_comment) + if (ops->jump.raw_comment && c > ops->jump.raw_comment) return NULL; - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) return NULL; return c; @@ -359,8 +372,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s const char *c = strchr(ops->raw, ','); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); c = validate_comma(c, ops); @@ -462,7 +475,16 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, ops->target.offset); } +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + static struct ins_ops jump_ops = { + .free = jump__delete, .parse = jump__parse, .scnprintf = jump__scnprintf, }; @@ -557,6 +579,34 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; +/* + * Check if the operand has more than one registers like x86 SIB addressing: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count = 0; + + if (arch->objdump.register_char == 0) + return false; + + if (arch->objdump.memory_ref_char) { + op = strchr(op, arch->objdump.memory_ref_char); + if (op == NULL) + return false; + } + + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { + count++; + op++; + } + + return count > 1; +} + static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -584,6 +634,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); @@ -604,6 +656,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->target.raw == NULL) goto out_free_source; + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); + if (comment == NULL) return 0; @@ -795,6 +849,11 @@ static struct arch *arch__find(const char *name) return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } +bool arch__is(struct arch *arch, const char *name) +{ + return !strcmp(arch->name, name); +} + static struct annotated_source *annotated_source__new(void) { struct annotated_source *src = zalloc(sizeof(*src)); @@ -810,7 +869,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } @@ -845,18 +903,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -865,9 +911,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -958,23 +1005,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } +struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) - return NULL; - goto alloc_cycles_hist; - } + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + const size_t size = symbol__size(sym); - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); } - return notes->src->cycles_hist; + return branch->cycles_hist; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -1077,12 +1134,20 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 u64 offset; for (offset = start; offset <= end; offset++) { - if (notes->offsets[offset]) + if (notes->src->offsets[offset]) n_insn++; } return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; @@ -1091,6 +1156,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ @@ -1098,54 +1164,76 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 return; for (offset = start; offset <= end; offset++) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; - if (al && al->ipc == 0.0) { - al->ipc = ipc; + if (al && al->cycles && al->cycles->ipc == 0.0) { + al->cycles->ipc = ipc; cover_insn++; } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } -void annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size) { + int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) - return; + if (!notes->branch || !notes->branch->cycles_hist) + return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; + al = notes->src->offsets[offset]; + if (al && al->cycles == NULL) { + al->cycles = zalloc(sizeof(*al->cycles)); + if (al->cycles == NULL) { + err = ENOMEM; + break; + } + } if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); - al = notes->offsets[offset]; if (al && ch->num_aggr) { - al->cycles = ch->cycles_aggr / ch->num_aggr; - al->cycles_max = ch->cycles_max; - al->cycles_min = ch->cycles_min; + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; + al->cycles->max = ch->cycles_max; + al->cycles->min = ch->cycles_min; + } + } + } + + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; + + if (ch && ch->cycles) { + struct annotation_line *al = notes->src->offsets[offset]; + if (al) + zfree(&al->cycles); } - notes->have_cycles = true; } } + annotation__unlock(notes); + return 0; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1225,6 +1313,7 @@ static void annotation_line__exit(struct annotation_line *al) { zfree_srcline(&al->path); zfree(&al->line); + zfree(&al->cycles); } static size_t disasm_line_size(int nr) @@ -1299,6 +1388,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -1817,7 +1907,6 @@ static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) { struct annotation *notes = symbol__annotation(sym); - struct annotation_options *opts = args->options; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; @@ -1927,7 +2016,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, prev_buf_size = buf_size; fflush(s); - if (!opts->hide_src_code && srcline) { + if (!annotate_opts.hide_src_code && srcline) { args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; @@ -2050,7 +2139,7 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { - struct annotation_options *opts = args->options; + struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; @@ -2113,12 +2202,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : "", opts->prefix ? "--prefix " : "", @@ -2299,15 +2389,8 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) +static int evsel__get_arch(struct evsel *evsel, struct arch **parch) { - struct symbol *sym = ms->sym; - struct annotation *notes = symbol__annotation(sym); - struct annotate_args args = { - .evsel = evsel, - .options = options, - }; struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; @@ -2316,25 +2399,45 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, if (!arch_name) return errno; - args.arch = arch = arch__find(arch_name); + *parch = arch = arch__find(arch_name); if (arch == NULL) { pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; } - if (parch) - *parch = arch; - if (arch->init) { err = arch->init(arch, env ? env->cpuid : NULL); if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + pr_err("%s: failed to initialize %s arch priv area\n", + __func__, arch->name); return err; } } + return 0; +} +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, + struct arch **parch) +{ + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct annotate_args args = { + .evsel = evsel, + .options = &annotate_opts, + }; + struct arch *arch = NULL; + int err; + + err = evsel__get_arch(evsel, &arch); + if (err < 0) + return err; + + if (parch) + *parch = arch; + + args.arch = arch; args.ms = *ms; - if (notes->options && notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2342,12 +2445,12 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al, - struct annotation_options *opts) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; + unsigned int percent_type = annotate_opts.percent_type; int i, ret; while (*p != NULL) { @@ -2358,7 +2461,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, if (ret == 0) { for (i = 0; i < al->data_nr; i++) { iter->data[i].percent_sum += annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } return; } @@ -2371,7 +2474,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, for (i = 0; i < al->data_nr; i++) { al->data[i].percent_sum = annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } rb_link_node(&al->rb_node, parent, p); @@ -2493,8 +2596,7 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -2505,6 +2607,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); struct annotation_line *pos, *queue = NULL; + struct annotation_options *opts = &annotate_opts; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; @@ -2633,8 +2736,7 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, - struct annotation_options *opts) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -2649,9 +2751,9 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, struct annotation_line *al; list_for_each_entry(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; - annotation_line__write(al, notes, &wops, opts); + annotation_line__write(al, notes, &wops); fputc('\n', fp); wops.first_line = false; } @@ -2659,8 +2761,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) { const char *ev_name = evsel__name(evsel); char buf[1024]; @@ -2682,7 +2783,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, map__dso(ms->map)->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp, opts); + symbol__annotate_fprintf2(ms->sym, fp); fclose(fp); err = 0; @@ -2769,7 +2870,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) return; for (offset = 0; offset < size; ++offset) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; struct disasm_line *dl; dl = disasm_line(al); @@ -2777,7 +2878,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (!disasm_line__is_valid_local_jump(dl, sym)) continue; - al = notes->offsets[dl->ops.target.offset]; + al = notes->src->offsets[dl->ops.target.offset]; /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we @@ -2794,19 +2895,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__set_offsets(struct annotation *notes, s64 size) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; + if (src->max_line_len < line_len) + src->max_line_len = line_len; + al->idx = src->nr_entries++; if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; + al->idx_asm = src->nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly * routines that comes with labels in the same column @@ -2815,7 +2917,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size) * E.g. copy_user_generic_unrolled */ if (al->offset < size) - notes->offsets[al->offset] = al; + notes->src->offsets[al->offset] = al; } else al->idx_asm = -1; } @@ -2858,24 +2960,24 @@ void annotation__init_column_widths(struct annotation *notes, struct symbol *sym void annotation__update_column_widths(struct annotation *notes) { - if (notes->options->use_offset) + if (annotate_opts.use_offset) notes->widths.target = notes->widths.min_addr; - else if (notes->options->full_addr) + else if (annotate_opts.full_addr) notes->widths.target = BITS_PER_LONG / 4; else notes->widths.target = notes->widths.max_addr; notes->widths.addr = notes->widths.target; - if (notes->options->show_nr_jumps) + if (annotate_opts.show_nr_jumps) notes->widths.addr += notes->widths.jumps + 1; } void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) { - notes->options->full_addr = !notes->options->full_addr; + annotate_opts.full_addr = !annotate_opts.full_addr; - if (notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2884,8 +2986,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m } static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, - struct annotation_options *opts) + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -2898,7 +2999,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, double percent; percent = annotation_data__percent(&al->data[i], - opts->percent_type); + annotate_opts.percent_type); if (percent > percent_max) percent_max = percent; @@ -2909,22 +3010,20 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al, opts); + insert_source_line(&tmp_root, al); } resort_source_line(root, &tmp_root); } -static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, - struct annotation_options *opts) +static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root, opts); + annotation__calc_lines(notes, ms->map, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; @@ -2933,7 +3032,7 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, char buf[1024]; int err; - err = symbol__annotate2(ms, evsel, opts, NULL); + err = symbol__annotate2(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2943,31 +3042,31 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, return -1; } - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout, opts); + buf, percent_type_str(annotate_opts.percent_type), sym->name, + dso->long_name); + symbol__annotate_fprintf2(sym, stdout); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; int err; - err = symbol__annotate(ms, evsel, opts, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2979,13 +3078,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, symbol__calc_percent(sym, evsel); - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(ms, evsel, opts); + symbol__annotate_printf(ms, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -3046,19 +3145,20 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); + disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name); } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", @@ -3083,8 +3183,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles) { - if (al->ipc == 0.0 && al->cycles == 0) + if (notes->branch && al->cycles) { + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else show_title = true; @@ -3120,18 +3220,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } } - if (notes->have_cycles) { - if (al->ipc) - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + if (notes->branch) { + if (al->cycles && al->cycles->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (!notes->options->show_minmax_cycle) { - if (al->cycles) + if (!annotate_opts.show_minmax_cycle) { + if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); @@ -3145,8 +3245,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati scnprintf(str, sizeof(str), "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", - al->cycles, al->cycles_min, - al->cycles_max); + al->cycles->avg, al->cycles->min, + al->cycles->max); obj__printf(obj, "%*s ", ANNOTATION__MINMAX_CYCLES_WIDTH - 1, @@ -3172,7 +3272,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!*al->line) obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); else if (al->offset == -1) { - if (al->line_nr && notes->options->show_linenr) + if (al->line_nr && annotate_opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -3182,15 +3282,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati u64 addr = al->offset; int color = -1; - if (!notes->options->use_offset) + if (!annotate_opts.use_offset) addr += notes->start; - if (!notes->options->use_offset) { + if (!annotate_opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (al->jump_sources && - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { - if (notes->options->show_nr_jumps) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { + if (annotate_opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", notes->widths.jumps, @@ -3204,9 +3304,9 @@ print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); } else if (ins__is_call(&disasm_line(al)->ins) && - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { goto print_addr; - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", @@ -3228,43 +3328,44 @@ print_addr: } void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops, - struct annotation_options *opts) + struct annotation_write_ops *wops) { __annotation_line__write(al, notes, wops->first_line, wops->current_entry, wops->change_color, wops->width, wops->obj, - opts->percent_type, + annotate_opts.percent_type, wops->set_color, wops->set_percent_color, wops->set_jumps_percent_color, wops->printf, wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); size_t size = symbol__size(sym); int nr_pcnt = 1, err; - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); - if (notes->offsets == NULL) + notes->src->offsets = zalloc(size * sizeof(struct annotation_line *)); + if (notes->src->offsets == NULL) return ENOMEM; if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, options, parch); + err = symbol__annotate(ms, evsel, parch); if (err) goto out_free_offsets; - notes->options = options; - symbol__calc_percent(sym, evsel); annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); - annotation__compute_ipc(notes, size); + + err = annotation__compute_ipc(notes, size); + if (err) + goto out_free_offsets; + annotation__init_column_widths(notes, sym); notes->nr_events = nr_pcnt; @@ -3274,7 +3375,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; out_free_offsets: - zfree(¬es->offsets); + zfree(¬es->src->offsets); return err; } @@ -3337,8 +3438,10 @@ static int annotation__config(const char *var, const char *value, void *data) return 0; } -void annotation_options__init(struct annotation_options *opt) +void annotation_options__init(void) { + struct annotation_options *opt = &annotate_opts; + memset(opt, 0, sizeof(*opt)); /* Default values. */ @@ -3349,16 +3452,15 @@ void annotation_options__init(struct annotation_options *opt) opt->percent_type = PERCENT_PERIOD_LOCAL; } - -void annotation_options__exit(struct annotation_options *opt) +void annotation_options__exit(void) { - zfree(&opt->disassembler_style); - zfree(&opt->objdump_path); + zfree(&annotate_opts.disassembler_style); + zfree(&annotate_opts.objdump_path); } -void annotation_config__init(struct annotation_options *opt) +void annotation_config__init(void) { - perf_config(annotation__config, opt); + perf_config(annotation__config, &annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3382,10 +3484,9 @@ static unsigned int parse_percent_type(char *str1, char *str2) return type; } -int annotate_parse_percent_type(const struct option *opt, const char *_str, +int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, int unset __maybe_unused) { - struct annotation_options *opts = opt->value; unsigned int type; char *str1, *str2; int err = -1; @@ -3404,7 +3505,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, if (type == (unsigned int) -1) type = parse_percent_type(str2, str1); if (type != (unsigned int) -1) { - opts->percent_type = type; + annotate_opts.percent_type = type; err = 0; } @@ -3413,11 +3514,267 @@ out: return err; } -int annotate_check_args(struct annotation_options *args) +int annotate_check_args(void) { + struct annotation_options *args = &annotate_opts; + if (args->prefix_strip && !args->prefix) { pr_err("--prefix-strip requires --prefix\n"); return -1; } return 0; } + +/* + * Get register number and access offset from the given instruction. + * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs + * to revisit the format when it handles different architecture. + * Fills @reg and @offset when return 0. + */ +static int extract_reg_offset(struct arch *arch, const char *str, + struct annotated_op_loc *op_loc) +{ + char *p; + char *regname; + + if (arch->objdump.register_char == 0) + return -1; + + /* + * It should start from offset, but it's possible to skip 0 + * in the asm. So 0(%rax) should be same as (%rax). + * + * However, it also start with a segment select register like + * %gs:0x18(%rbx). In that case it should skip the part. + */ + if (*str == arch->objdump.register_char) { + while (*str && !isdigit(*str) && + *str != arch->objdump.memory_ref_char) + str++; + } + + op_loc->offset = strtol(str, &p, 0); + + p = strchr(p, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg = get_dwarf_regnum(regname, 0); + free(regname); + return 0; +} + +/** + * annotate_get_insn_location - Get location of instruction + * @arch: the architecture info + * @dl: the target instruction + * @loc: a buffer to save the data + * + * Get detailed location info (register and offset) in the instruction. + * It needs both source and target operand and whether it accesses a + * memory location. The offset field is meaningful only when the + * corresponding mem flag is set. + * + * Some examples on x86: + * + * mov (%rax), %rcx # src_reg = rax, src_mem = 1, src_offset = 0 + * # dst_reg = rcx, dst_mem = 0 + * + * mov 0x18, %r8 # src_reg = -1, dst_reg = r8 + */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc) +{ + struct ins_operands *ops; + struct annotated_op_loc *op_loc; + int i; + + if (!strcmp(dl->ins.name, "lock")) + ops = dl->ops.locked.ops; + else + ops = &dl->ops; + + if (ops == NULL) + return -1; + + memset(loc, 0, sizeof(*loc)); + + for_each_insn_op_loc(loc, i, op_loc) { + const char *insn_str = ops->source.raw; + + if (i == INSN_OP_TARGET) + insn_str = ops->target.raw; + + /* Invalidate the register by default */ + op_loc->reg = -1; + + if (insn_str == NULL) + continue; + + if (strchr(insn_str, arch->objdump.memory_ref_char)) { + op_loc->mem_ref = true; + extract_reg_offset(arch, insn_str, op_loc); + } else { + char *s = strdup(insn_str); + + if (s) { + op_loc->reg = get_dwarf_regnum(s, 0); + free(s); + } + } + } + + return 0; +} + +static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel) +{ + struct disasm_line *dl, *tmp_dl; + struct annotation *notes; + + notes = symbol__annotation(ms->sym); + if (!list_empty(¬es->src->source)) + return; + + if (symbol__annotate(ms, evsel, NULL) < 0) + return; + + /* remove non-insn disasm lines for simplicity */ + list_for_each_entry_safe(dl, tmp_dl, ¬es->src->source, al.node) { + if (dl->al.offset == -1) { + list_del(&dl->al.node); + free(dl); + } + } +} + +static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip) +{ + struct disasm_line *dl; + struct annotation *notes; + + notes = symbol__annotation(sym); + + list_for_each_entry(dl, ¬es->src->source, al.node) { + if (sym->start + dl->al.offset == ip) + return dl; + } + return NULL; +} + +static struct annotated_item_stat *annotate_data_stat(struct list_head *head, + const char *name) +{ + struct annotated_item_stat *istat; + + list_for_each_entry(istat, head, list) { + if (!strcmp(istat->name, name)) + return istat; + } + + istat = zalloc(sizeof(*istat)); + if (istat == NULL) + return NULL; + + istat->name = strdup(name); + if (istat->name == NULL) { + free(istat); + return NULL; + } + + list_add_tail(&istat->list, head); + return istat; +} + +/** + * hist_entry__get_data_type - find data type for given hist entry + * @he: hist entry + * + * This function first annotates the instruction at @he->ip and extracts + * register and offset info from it. Then it searches the DWARF debug + * info to get a variable and type information using the address, register, + * and offset. + */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +{ + struct map_symbol *ms = &he->ms; + struct evsel *evsel = hists_to_evsel(he->hists); + struct arch *arch; + struct disasm_line *dl; + struct annotated_insn_loc loc; + struct annotated_op_loc *op_loc; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + u64 ip = he->ip; + int i; + + ann_data_stat.total++; + + if (ms->map == NULL || ms->sym == NULL) { + ann_data_stat.no_sym++; + return NULL; + } + + if (!symbol_conf.init_annotation) { + ann_data_stat.no_sym++; + return NULL; + } + + if (evsel__get_arch(evsel, &arch) < 0) { + ann_data_stat.no_insn++; + return NULL; + } + + /* Make sure it runs objdump to get disasm of the function */ + symbol__ensure_annotate(ms, evsel); + + /* + * Get a disasm to extract the location from the insn. + * This is too slow... + */ + dl = find_disasm_line(ms->sym, ip); + if (dl == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + if (annotate_get_insn_location(arch, dl, &loc) < 0) { + ann_data_stat.no_insn_ops++; + istat->bad++; + return NULL; + } + + for_each_insn_op_loc(&loc, i, op_loc) { + if (!op_loc->mem_ref) + continue; + + mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); + if (mem_type) + istat->good++; + else + istat->bad++; + + if (symbol_conf.annotate_data_sample) { + annotated_data_type__update_samples(mem_type, evsel, + op_loc->offset, + he->stat.nr_events, + he->stat.period); + } + he->mem_type_off = op_loc->offset; + return mem_type; + } + + ann_data_stat.no_mem_ops++; + istat->bad++; + return NULL; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 96278055917601c31ee928099188ddd35d5c1e0c..dba50762c6e807198880909a7a058e78bc7f9e21 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -23,6 +23,7 @@ struct option; struct perf_sample; struct evsel; struct symbol; +struct annotated_data_type; struct ins { const char *name; @@ -31,8 +32,6 @@ struct ins { struct ins_operands { char *raw; - char *raw_comment; - char *raw_func_start; struct { char *raw; char *name; @@ -41,22 +40,30 @@ struct ins_operands { s64 offset; bool offset_avail; bool outside; + bool multi_regs; } target; union { struct { char *raw; char *name; u64 addr; + bool multi_regs; } source; struct { struct ins ins; struct ins_operands *ops; } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; }; }; struct arch; +bool arch__is(struct arch *arch, const char *name); + struct ins_ops { void (*free)(struct ins_operands *ops); int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); @@ -101,6 +108,8 @@ struct annotation_options { unsigned int percent_type; }; +extern struct annotation_options annotate_opts; + enum { ANNOTATION__OFFSET_JUMP_TARGETS = 1, ANNOTATION__OFFSET_CALL, @@ -130,6 +139,13 @@ struct annotation_data { struct sym_hist_entry he; }; +struct cycles_info { + float ipc; + u64 avg; + u64 max; + u64 min; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -137,12 +153,9 @@ struct annotation_line { char *line; int line_nr; char *fileloc; - int jump_sources; - float ipc; - u64 cycles; - u64 cycles_max; - u64 cycles_min; char *path; + struct cycles_info *cycles; + int jump_sources; u32 idx; int idx_asm; int data_nr; @@ -214,8 +227,7 @@ struct annotation_write_ops { }; void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops, - struct annotation_options *opts); + struct annotation_write_ops *ops); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, @@ -264,27 +276,29 @@ struct cyc_hist { * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; - struct sym_hist *histograms; + struct list_head source; + size_t sizeof_sym_hist; + struct sym_hist *histograms; + struct annotation_line **offsets; + int nr_histograms; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; }; -struct LOCKABLE annotation { - u64 max_coverage; - u64 start; +struct annotated_branch { u64 hit_cycles; u64 hit_insn; unsigned int total_insn; unsigned int cover_insn; - struct annotation_options *options; - struct annotation_line **offsets; + struct cyc_hist *cycles_hist; + u64 max_coverage; +}; + +struct LOCKABLE annotation { + u64 start; int nr_events; int max_jump_sources; - int nr_entries; - int nr_asm_entries; - u16 max_line_len; struct { u8 addr; u8 jumps; @@ -293,8 +307,8 @@ struct LOCKABLE annotation { u8 max_addr; u8 max_ins_name; } widths; - bool have_cycles; struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -308,10 +322,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && annotate_opts.show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) @@ -319,13 +333,12 @@ static inline int annotation__pcnt_width(struct annotation *notes) return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } -static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +static inline bool annotation_line__filter(struct annotation_line *al) { - return notes->options->hide_src_code && al->offset == -1; + return annotate_opts.hide_src_code && al->offset == -1; } void annotation__set_offsets(struct annotation *notes, s64 size); -void annotation__compute_ipc(struct annotation *notes, size_t size); void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); @@ -349,6 +362,8 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, struct evsel *evsel); +struct annotated_branch *annotation__get_branch(struct annotation *notes); + int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); @@ -361,11 +376,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); enum symbol_disassemble_errno { @@ -392,43 +405,86 @@ enum symbol_disassemble_errno { int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options); +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts); +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); + struct hist_browser_timer *hbt); #else static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } #endif -void annotation_options__init(struct annotation_options *opt); -void annotation_options__exit(struct annotation_options *opt); +void annotation_options__init(void); +void annotation_options__exit(void); -void annotation_config__init(struct annotation_options *opt); +void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); -int annotate_check_args(struct annotation_options *args); +int annotate_check_args(void); + +/** + * struct annotated_op_loc - Location info of instruction operand + * @reg: Register in the operand + * @offset: Memory access offset in the operand + * @mem_ref: Whether the operand accesses memory + */ +struct annotated_op_loc { + int reg; + int offset; + bool mem_ref; +}; + +enum annotated_insn_ops { + INSN_OP_SOURCE = 0, + INSN_OP_TARGET = 1, + + INSN_OP_MAX, +}; + +/** + * struct annotated_insn_loc - Location info of instruction + * @ops: Array of location info for source and target operands + */ +struct annotated_insn_loc { + struct annotated_op_loc ops[INSN_OP_MAX]; +}; + +#define for_each_insn_op_loc(insn_loc, i, op_loc) \ + for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i]; \ + i < INSN_OP_MAX; \ + i++, op_loc++) + +/* Get detailed location info in the instruction */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc); + +/* Returns a data type from the sample instruction (if any) */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he); + +struct annotated_item_stat { + struct list_head list; + char *name; + int good; + int bad; +}; +extern struct list_head ann_insn_stat; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746ab6c046eed1a3b8bfb671af71456..3684e6009b635076c8171d68b4b9edb89bfcf1f6 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); mp->mmap_needed = evsel->needs_auxtrace_mmap; @@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); if (per_cpu_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); @@ -1638,6 +1638,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'Z': synth_opts->timeless_decoding = true; break; + case 'T': + synth_opts->use_timestamp = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 29eb82dff5749c44afa6200dfe3894e64be5258b..55702215a82d31c1a519dde9df327d276adc5c2f 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -99,6 +99,7 @@ enum itrace_period_type { * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @use_timestamp: use the timestamp trace as kernel time * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -146,6 +147,7 @@ struct itrace_synth_opts { bool remote_access; bool mem; bool timeless_decoding; + bool use_timestamp; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; @@ -678,6 +680,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " q: quicker (less detailed) decoding\n" \ " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ +" T: use the timestamp trace as kernel time\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is iybxwpe or cewp\n" diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385caee7be9ff7834b18b255994747c..dec910989701eb9433442c00383e01673e11dd86 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; @@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0ab3584253046d1557036f3f7395e3..96f53e89795e24a95293a5170a168279df5760bc 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 680e92774d0cde6171fddcc46a992af02766615a..15c42196c24c8230779b04c1ddd55e1d473deade 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -311,6 +311,7 @@ done: double block_range__coverage(struct block_range *br) { struct symbol *sym; + struct annotated_branch *branch; if (!br) { if (block_ranges.blocks) @@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br) if (!sym) return -1; - return (double)br->coverage / symbol__annotation(sym)->max_coverage; + branch = symbol__annotation(sym)->branch; + if (!branch) + return -1; + + return (double)br->coverage / branch->max_coverage; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 38fcf3ba5749d9f77aab72af20aae9e949e9eb0d..3573e0b7ef3eda83ba635868f303bfd30df7153e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -386,6 +386,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, int err; int fd; + if (opts->no_bpf_event) + return 0; + event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; @@ -542,9 +545,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp) +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) { __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); @@ -560,7 +563,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, if (info->btf_id) { struct btf_node *node; - node = perf_env__find_btf(env, info->btf_id); + node = __perf_env__find_btf(env, info->btf_id); if (node) btf = btf__new((__u8 *)(node->data), node->data_size); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 1bcbd4fb6c669d76065255ad196addaa37cf85aa..e2f0420905f597410dc027f4d6e3e0a5e8ccc48c 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,9 +33,9 @@ struct btf_node { int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp); +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -50,9 +50,9 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, return 0; } -static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, - struct perf_env *env __maybe_unused, - FILE *fp __maybe_unused) +static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) { } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7f9b0e46e008c466604aec8c4e59ace15001a5e8..7a8af60e0f5158fe7936898a7c12fceadef7e8f0 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) return -1; if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new(NULL); + all_cpu_map = perf_cpu_map__new_online_cpus(); if (!all_cpu_map) return -1; } diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index f1716c089c9912f4f9bfca827bde4e509db8d22e..31ff19afc20c1b857a4397185926007dacb75e71 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -318,7 +318,7 @@ int lock_contention_read(struct lock_contention *con) } /* make sure it loads the kernel map */ - map__load(maps__first(machine->kmaps)->map); + maps__load_first(machine->kmaps); prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 0cd3369af2a4f2cdfb184f25e22a3beb53c87040..b29109cd36095c4fe4063cff1d60cf50b420d66d 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -3,6 +3,8 @@ #define PERF_COMPRESS_H #include +#include +#include #ifdef HAVE_ZSTD_SUPPORT #include #endif @@ -21,6 +23,7 @@ struct zstd_data { #ifdef HAVE_ZSTD_SUPPORT ZSTD_CStream *cstream; ZSTD_DStream *dstream; + int comp_level; #endif }; @@ -29,7 +32,7 @@ struct zstd_data { int zstd_init(struct zstd_data *data, int level); int zstd_fini(struct zstd_data *data); -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)); @@ -48,7 +51,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused) } static inline -size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, void *dst __maybe_unused, size_t dst_size __maybe_unused, void *src __maybe_unused, size_t src_size __maybe_unused, size_t max_record_size __maybe_unused, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0e090e8bc33491eea0f2007f136aa236cc103482..0581ee0fa5f270b4eb6fa4ae5776afb056804099 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ static struct perf_cpu_map *online; if (!online) - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ return online; } diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 81cfc85f46682ce0c0a2cc5a88556dbb88beb23b..8bbeb2dc76fda994b7f83abd227aceaed6e78c55 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void) ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); return NULL; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a9873d14c6329925968770cf571724bf25a84df8..d65d7485886cd512fca26134f6f34921b13753a1 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3346,12 +3346,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; - /* Use virtual timestamps if all ETMs report ts_source = 1 */ - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + if (etm->synth_opts.use_timestamp) + /* + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, + * therefore the decoder cannot know if the timestamp trace is + * same with the kernel time. + * + * If a user has knowledge for the working platform and can + * specify itrace option 'T' to tell decoder to forcely use the + * traced timestamp as the kernel time. + */ + etm->has_virtual_ts = true; + else + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); if (!etm->has_virtual_ts) ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" - "The time field of the samples will not be set accurately.\n\n"); + "The time field of the samples will not be set accurately.\n" + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" + "you can specify the itrace option 'T' for timestamp decoding\n" + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b9fb71ab7a7303a301465ec2cfa82e6efbc02538..106429155c2e9d131f65bd425e375f76e4e8de7a 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -253,8 +253,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ addr_location__init(&al); al.sym = node->ms.sym; - al.map = node->ms.map; - al.maps = thread__maps(thread); + al.map = map__get(node->ms.map); + al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; if (al.map && !al.sym) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 88378c4c5dd9e6d8739c3c49e836782464b5a0bf..e282b4ceb4d25fd560a972acb50689010bd01a85 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -38,12 +38,21 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static FILE *debug_file; +static FILE *_debug_file; bool debug_display_time; +FILE *debug_file(void) +{ + if (!_debug_file) { + pr_warning_once("debug_file not set"); + debug_set_file(stderr); + } + return _debug_file; +} + void debug_set_file(FILE *file) { - debug_file = file; + _debug_file = file; } void debug_set_display_time(bool set) @@ -78,8 +87,8 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); } else { - ret = fprintf_time(debug_file); - ret += vfprintf(debug_file, fmt, args); + ret = fprintf_time(debug_file()); + ret += vfprintf(debug_file(), fmt, args); } } @@ -107,9 +116,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args) nsecs -= secs * NSEC_PER_SEC; usecs = nsecs / NSEC_PER_USEC; - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", - secs, usecs); - ret += vfprintf(stderr, fmt, args); + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); + ret += vfprintf(debug_file(), fmt, args); return ret; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f99468a7f68170017f0fa9adc1862704bdbf3a71..de8870980d44abc3f4a52add52affbdaefe11448 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -77,6 +77,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +FILE *debug_file(void); void debug_set_file(FILE *file); void debug_set_display_time(bool set); void perf_debug_setup(void); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c new file mode 100644 index 0000000000000000000000000000000000000000..19acf4775d3587a44d6fd05ed43f5603d3b1b735 --- /dev/null +++ b/tools/perf/util/debuginfo.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DWARF debug information handling code. Copied from probe-finder.c. + * + * Written by Masami Hiramatsu + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "build-id.h" +#include "dso.h" +#include "debug.h" +#include "debuginfo.h" +#include "symbol.h" + +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + +/* Dwarf FL wrappers */ +static char *debuginfo_path; /* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + + .section_address = dwfl_offline_section_address, + + /* We use this table for core files too. */ + .find_elf = dwfl_build_id_find_elf, +}; + +/* Get a Dwarf from offline image */ +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, + const char *path) +{ + GElf_Addr dummy; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) + goto error; + + dwfl_report_begin(dbg->dwfl); + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) + goto error; + + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) + goto error; + + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + + dwfl_report_end(dbg->dwfl, NULL, NULL); + + return 0; +error: + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + else + close(fd); + memset(dbg, 0, sizeof(*dbg)); + + return -ENOENT; +} + +static struct debuginfo *__debuginfo__new(const char *path) +{ + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) + return NULL; + + if (debuginfo__init_offline_dwarf(dbg, path) < 0) + zfree(&dbg); + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); + return dbg; +} + +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + struct build_id bid; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__put(dso); + +out: + /* if failed to open all distro debuginfo, open given binary */ + return dinfo ? : __debuginfo__new(path); +} + +void debuginfo__delete(struct debuginfo *dbg) +{ + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); + } +} + +/* For the kernel module, we need a special code to get a DIE */ +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; + } + } + return 0; +} + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h new file mode 100644 index 0000000000000000000000000000000000000000..4d65b8c605fc5445cecc622d5d663eb1fed7d21b --- /dev/null +++ b/tools/perf/util/debuginfo.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PERF_DEBUGINFO_H +#define _PERF_DEBUGINFO_H + +#include +#include + +#ifdef HAVE_DWARF_SUPPORT + +#include "dwarf-aux.h" + +/* debug information structure */ +struct debuginfo { + Dwarf *dbg; + Dwfl_Module *mod; + Dwfl *dwfl; + Dwarf_Addr bias; + const unsigned char *build_id; +}; + +/* This also tries to open distro debuginfo */ +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); + +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + +#else /* HAVE_DWARF_SUPPORT */ + +/* dummy debug information structure */ +struct debuginfo { +}; + +static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) +{ + return NULL; +} + +static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) +{ +} + +static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, + Dwarf_Addr *offs __maybe_unused, + bool adjust_offset __maybe_unused) +{ + return -EINVAL; +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, + char **new_path); +#else /* HAVE_DEBUGINFOD_SUPPORT */ +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ + +#endif /* _PERF_DEBUGINFO_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 1f629b6fb7cfe3420df429cdf04ba12dc76b8183..22fd5fa806ed8f589ca1209710288c0e44ac19a2 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -31,6 +31,7 @@ #include "debug.h" #include "string2.h" #include "vdso.h" +#include "annotate-data.h" static const char * const debuglink_paths[] = { "%.0s%s", @@ -1327,6 +1328,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; + dso->data_types = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1370,6 +1372,8 @@ void dso__delete(struct dso *dso) symbols__delete(&dso->symbols); dso->symbol_names_len = 0; zfree(&dso->symbol_names); + annotated_data_type__tree_delete(&dso->data_types); + if (dso->short_name_allocated) { zfree((char **)&dso->short_name); dso->short_name_allocated = false; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3759de8c2267af674290b3bc5718a97c0e83997d..ce9f3849a773cc49c17b9835a675f895667843eb 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -154,6 +154,8 @@ struct dso { size_t symbol_names_len; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; + struct rb_root data_types; + struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2941d88f2199c42b341cdb9fb741e0dba43cdcbe..7aa5fee0da1906a073ac9423305bf26deb569761 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1051,32 +1051,28 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, } /** - * die_get_typename - Get the name of given variable DIE - * @vr_die: a variable DIE + * die_get_typename_from_type - Get the name of given type DIE + * @type_die: a type DIE * @buf: a strbuf for result type name * - * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * Get the name of @type_die and stores it to @buf. Return 0 if succeeded. * and Return -ENOENT if failed to find type name. * Note that the result will stores typedef name if possible, and stores * "*(function_type)" if the type is a function pointer. */ -int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf) { - Dwarf_Die type; int tag, ret; const char *tmp = ""; - if (__die_get_real_type(vr_die, &type) == NULL) - return -ENOENT; - - tag = dwarf_tag(&type); + tag = dwarf_tag(type_die); if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ return strbuf_add(buf, "(function_type)", 15); } else { - const char *name = dwarf_diename(&type); + const char *name = dwarf_diename(type_die); if (tag == DW_TAG_union_type) tmp = "union "; @@ -1089,8 +1085,35 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) /* Write a base name */ return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } - ret = die_get_typename(&type, buf); - return ret ? ret : strbuf_addstr(buf, tmp); + ret = die_get_typename(type_die, buf); + if (ret < 0) { + /* void pointer has no type attribute */ + if (tag == DW_TAG_pointer_type && ret == -ENOENT) + return strbuf_addf(buf, "void*"); + + return ret; + } + return strbuf_addstr(buf, tmp); +} + +/** + * die_get_typename - Get the name of given variable DIE + * @vr_die: a variable DIE + * @buf: a strbuf for result type name + * + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * and Return -ENOENT if failed to find type name. + * Note that the result will stores typedef name if possible, and stores + * "*(function_type)" if the type is a function pointer. + */ +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +{ + Dwarf_Die type; + + if (__die_get_real_type(vr_die, &type) == NULL) + return -ENOENT; + + return die_get_typename_from_type(&type, buf); } /** @@ -1238,12 +1261,151 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } -#else -int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) + +/* Interval parameters for __die_find_var_reg_cb() */ +struct find_var_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Target memory address (for global data) */ + Dwarf_Addr addr; + /* Target register */ + unsigned reg; + /* Access offset, set for global data */ + int offset; +}; + +/* Max number of registers DW_OP_regN supports */ +#define DWARF_OP_DIRECT_REGS 32 + +/* Only checks direct child DIEs in the given scope. */ +static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + /* Assuming the location list is sorted by address */ + if (end < data->pc) + continue; + if (start > data->pc) + break; + + /* Only match with a simple case */ + if (data->reg < DWARF_OP_DIRECT_REGS) { + if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1) + return DIE_FIND_CB_END; + } else { + if (ops->atom == DW_OP_regx && ops->number == data->reg && + nops == 1) + return DIE_FIND_CB_END; + } + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_reg - Find a variable saved in a register + * @sc_die: a scope DIE + * @pc: the program address to find + * @reg: the register number to find + * @die_mem: a buffer to save the resulting DIE + * + * Find the variable DIE accessed by the given register. + */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem) +{ + struct find_var_data data = { + .pc = pc, + .reg = reg, + }; + return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); +} + +/* Only checks direct child DIEs in the given scope */ +static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Word size; + Dwarf_Die type_die; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (ops->atom != DW_OP_addr) + continue; + + if (data->addr < ops->number) + continue; + + if (data->addr == ops->number) { + /* Update offset relative to the start of the variable */ + data->offset = 0; + return DIE_FIND_CB_END; + } + + if (die_get_real_type(die_mem, &type_die) == NULL) + continue; + + if (dwarf_aggregate_size(&type_die, &size) < 0) + continue; + + if (data->addr >= ops->number + size) + continue; + + /* Update offset relative to the start of the variable */ + data->offset = data->addr - ops->number; + return DIE_FIND_CB_END; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_addr - Find variable located at given address + * @sc_die: a scope DIE + * @pc: the program address to find + * @addr: the data address to find + * @die_mem: a buffer to save the resulting DIE + * @offset: the offset in the resulting type + * + * Find the variable DIE located at the given address (in PC-relative mode). + * This is usually for global variables. + */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset) { - return -ENOTSUP; + struct find_var_data data = { + .pc = pc, + .addr = addr, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); + if (result) + *offset = data.offset; + return result; } #endif @@ -1425,3 +1587,56 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, *entrypc = postprologue_addr; } + +/* Internal parameters for __die_find_scope_cb() */ +struct find_scope_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Number of scopes found [output] */ + int nr; + /* Array of scopes found, 0 for the outermost one. [output] */ + Dwarf_Die *scopes; +}; + +static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_scope_data *data = arg; + + if (dwarf_haspc(die_mem, data->pc)) { + Dwarf_Die *tmp; + + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); + if (tmp == NULL) + return DIE_FIND_CB_END; + + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); + data->scopes = tmp; + data->nr++; + return DIE_FIND_CB_CHILD; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_scopes - Return a list of scopes including the address + * @cu_die: a compile unit DIE + * @pc: the address to find + * @scopes: the array of DIEs for scopes (result) + * + * This function does the same as the dwarf_getscopes() but doesn't follow + * the origins of inlined functions. It returns the number of scopes saved + * in the @scopes argument. The outer scope will be saved first (index 0) and + * the last one is the innermost scope at the @pc. + */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) +{ + struct find_scope_data data = { + .pc = pc, + }; + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); + + *scopes = data.scopes; + return data.nr; +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ec8bc1083bb33f81f6d128995552be35e6bb762..4e64caac6df83ea5ba292225894206b450d63222 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -116,12 +116,14 @@ Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, Dwarf_Die *die_mem); +/* Get the name of given type DIE */ +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf); + /* Get the name of given variable DIE */ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); /* Check if target program is compiled with optimization */ bool die_is_optimized_target(Dwarf_Die *cu_die); @@ -130,4 +132,49 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); -#endif +/* Get the list of including scopes */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); + +#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT + +/* Get byte offset range of given variable DIE */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); + +/* Find a variable saved in the 'reg' at given address */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem); + +/* Find a (global) variable located in the 'addr' */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset); + +#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} + +static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + int reg __maybe_unused, + Dwarf_Die *die_mem __maybe_unused) +{ + return NULL; +} + +static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + Dwarf_Addr addr __maybe_unused, + Dwarf_Die *die_mem __maybe_unused, + int *offset __maybe_unused) +{ + return NULL; +} + +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +#endif /* _DWARF_AUX_H */ diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 69cfaa5953bf475cf02db129d1dc6ad6aa7332e3..5b7f86c0063f2b279fa5af9ee2f7c6b1b2220996 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,9 +5,12 @@ * Written by: Masami Hiramatsu */ +#include +#include #include #include #include +#include #include #ifndef EM_AARCH64 @@ -68,3 +71,34 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) } return NULL; } + +__weak int get_arch_regnum(const char *name __maybe_unused) +{ + return -ENOTSUP; +} + +/* Return DWARF register number from architecture register name */ +int get_dwarf_regnum(const char *name, unsigned int machine) +{ + char *regname = strdup(name); + int reg = -1; + char *p; + + if (regname == NULL) + return -EINVAL; + + /* For convenience, remove trailing characters */ + p = strpbrk(regname, " ,)"); + if (p) + *p = '\0'; + + switch (machine) { + case EM_NONE: /* Generic arch - use host arch */ + reg = get_arch_regnum(regname); + break; + default: + pr_err("ELF MACHINE %x is not supported.\n", machine); + } + free(regname); + return reg; +} diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 44140b7f596a3f2009fc2f901317a602f14fe6c4..a459374d0a1a1dc89721e210616cfe201f01789d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "debug.h" #include "env.h" #include "util/header.h" +#include "linux/compiler.h" #include #include #include "cgroup.h" @@ -12,6 +13,7 @@ #include #include "pmus.h" #include "strbuf.h" +#include "trace/beauty/beauty.h" struct perf_env perf_env; @@ -22,13 +24,19 @@ struct perf_env perf_env; void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +{ + down_write(&env->bpf_progs.lock); + __perf_env__insert_bpf_prog_info(env, info_node); + up_write(&env->bpf_progs.lock); +} + +void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; struct rb_node *parent = NULL; struct rb_node **p; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.infos.rb_node; while (*p != NULL) { @@ -40,15 +48,13 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - goto out; + return; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; -out: - up_write(&env->bpf_progs.lock); } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -77,14 +83,22 @@ out: } bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_btf(env, btf_node); + up_write(&env->bpf_progs.lock); + return ret; +} + +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; - bool ret = true; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; while (*p != NULL) { @@ -96,25 +110,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); - ret = false; - goto out; + return false; } } rb_link_node(&btf_node->rb_node, parent, p); rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); env->bpf_progs.btfs_cnt++; -out: - up_write(&env->bpf_progs.lock); - return ret; + return true; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ + struct btf_node *res; + + down_read(&env->bpf_progs.lock); + res = __perf_env__find_btf(env, btf_id); + up_read(&env->bpf_progs.lock); + return res; +} + +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) { struct btf_node *node = NULL; struct rb_node *n; - down_read(&env->bpf_progs.lock); n = env->bpf_progs.btfs.rb_node; while (n) { @@ -124,13 +144,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - goto out; + return node; } - node = NULL; - -out: - up_read(&env->bpf_progs.lock); - return node; + return NULL; } /* purge data in bpf_progs.infos tree */ @@ -453,6 +469,18 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) +{ +#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + if (env->arch_strerrno == NULL) + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); + + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; +#else + return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; +#endif +} + const char *perf_env__cpuid(struct perf_env *env) { int status; @@ -531,6 +559,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) +{ + char *pmu_mapping = env->pmu_mappings, *colon; + + for (int i = 0; i < env->nr_pmu_mappings; ++i) { + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') + goto out_error; + + pmu_mapping = colon + 1; + if (strcmp(pmu_mapping, pmu_name) == 0) + return true; + + pmu_mapping += strlen(pmu_mapping) + 1; + } +out_error: + return false; +} + char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4566c51f2fd956ca12ee8a17ef66a3439b0571f4..7c527e65c1864b524c8dfc1d844fac1f6e3ee1a7 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -46,10 +46,17 @@ struct hybrid_node { struct pmu_caps { int nr_caps; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; + char **caps; char *pmu_name; }; +typedef const char *(arch_syscalls__strerrno_t)(int err); + +arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); + struct perf_env { char *hostname; char *os_release; @@ -62,6 +69,8 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; int kernel_is_64_bit; int nr_cmdline; @@ -130,6 +139,7 @@ struct perf_env { */ bool enabled; } clock; + arch_syscalls__strerrno_t *arch_strerrno; }; enum perf_compress_type { @@ -159,19 +169,26 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__arch_strerrno(struct perf_env *env, int err); const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); +void __perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); + +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 923c0fb1512226a60c7a01730c405ca15e6982c9..68f45e9e63b6e4f8fcdf6476dd0b2f9c3789dd3a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,13 +617,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; } else { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e36da58522efb3d9639bd6f12c00ff7ecb6e9a8b..95f25e9fb994ab2a5190c40f91e9bbe3d5f884be 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) return -1; if (target__uses_dummy_map(target)) - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); @@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) goto out; @@ -2518,3 +2518,33 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis } perf_cpu_map__put(user_requested_cpus); } + +void evlist__uniquify_name(struct evlist *evlist) +{ + char *new_name, empty_attributes[2] = ":", *attributes; + struct evsel *pos; + + if (perf_pmus__num_core_pmus() == 1) + return; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_hybrid(pos)) + continue; + + if (strchr(pos->name, '/')) + continue; + + attributes = strchr(pos->name, ':'); + if (attributes) + *attributes = '\0'; + else + attributes = empty_attributes; + + if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + free(pos->name); + pos->name = new_name; + } else { + *attributes = ':'; + } + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 98e7ddb2bd3058106f77271f61f954242bc3984f..cb91dc9117a2726b34b9dce5265186c89eee96cd 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -442,5 +442,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); +void evlist__uniquify_name(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 72a5dfc38d3806c50ed3c0b933d9a94a56215945..6d7c9c58a9bcb8b7ed70e38286026cac16543163 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, if (cpus == NULL) { if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.branch_counters) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { @@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.read_lost && + if (!perf_missing_features.branch_counters && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + return true; + } else if (!perf_missing_features.read_lost && (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { perf_missing_features.read_lost = true; pr_debug2("switching off PERF_FORMAT_LOST support\n"); @@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) return new_val; } +static inline bool evsel__has_branch_counters(const struct evsel *evsel) +{ + struct evsel *cur, *leader = evsel__leader(evsel); + + /* The branch counters feature only supports group */ + if (!leader || !evsel->evlist) + return false; + + evlist__for_each_entry(evsel->evlist, cur) { + if ((leader == evsel__leader(cur)) && + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + return true; + } + return false; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; + + if (evsel__has_branch_counters(evsel)) { + OVERFLOW_CHECK_u64(array); + + data->branch_stack_cntr = (u64 *)array; + sz = data->branch_stack->nr * sizeof(u64); + + OVERFLOW_CHECK(array, sz, max_size); + array = (void *)array + sz; + } } if (type & PERF_SAMPLE_REGS_USER) { @@ -2820,7 +2853,8 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const #endif -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize) { int paranoid; @@ -2828,18 +2862,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.type == PERF_TYPE_HARDWARE && evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* - * If it's cycles then fall back to hrtimer based - * cpu-clock-tick sw counter, which is always available even if - * no PMU support. + * If it's cycles then fall back to hrtimer based cpu-clock sw + * counter, which is always available even if no PMU support. * * PPC returns ENXIO until 2.6.37 (behavior changed with commit * b0a873e). */ - scnprintf(msg, msgsize, "%s", -"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); - evsel->core.attr.type = PERF_TYPE_SOFTWARE; - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; + evsel->core.attr.config = target__has_cpu(target) + ? PERF_COUNT_SW_CPU_CLOCK + : PERF_COUNT_SW_TASK_CLOCK; + scnprintf(msg, msgsize, + "The cycles event is not supported, trying to fall back to %s", + target__has_cpu(target) ? "cpu-clock" : "task-clock"); zfree(&evsel->name); return true; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d791316a1792e5931ef5ebaf81215f21104636c8..efbb6e848287f3f6b4f9f0aca779b2a6590ec42f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -191,6 +191,7 @@ struct perf_missing_features { bool code_page_size; bool weight_struct; bool read_lost; + bool branch_counters; }; extern struct perf_missing_features perf_missing_features; @@ -459,7 +460,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel) evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize); int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size); diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fefc72066c4e8ee1e85068180a752c12cfb15d23..ac17a3cb59dc0d08621015506b48188ea4a74d03 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -293,9 +293,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e86b9439ffee054a4088efc944b1b4b657f8d330..3fe28edc3d017a39127abdf64c8289cc995cda35 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1444,7 +1444,9 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) nodes = new_nodes; size += 4; } - ret = memory_node__read(&nodes[cnt++], idx); + ret = memory_node__read(&nodes[cnt], idx); + if (!ret) + cnt += 1; } out: closedir(dir); @@ -1847,8 +1849,8 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - bpf_event__print_bpf_prog_info(&node->info_linear->info, - env, fp); + __bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); @@ -2145,6 +2147,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } + + if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && + perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + + if (max_precise != NULL && atoi(max_precise) == 0) + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); + } } static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) @@ -3178,7 +3188,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + __perf_env__insert_bpf_prog_info(env, info_node); } up_write(&env->bpf_progs.lock); @@ -3225,7 +3235,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - perf_env__insert_btf(env, node); + __perf_env__insert_btf(env, node); node = NULL; } @@ -3259,7 +3269,9 @@ static int process_compressed(struct feat_fd *ff, } static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, - char ***caps, unsigned int *max_branches) + char ***caps, unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) { char *name, *value, *ptr; u32 nr_pmu_caps, i; @@ -3294,6 +3306,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, if (!strcmp(name, "branches")) *max_branches = atoi(value); + if (!strcmp(name, "branch_counter_nr")) + *br_cntr_nr = atoi(value); + + if (!strcmp(name, "branch_counter_width")) + *br_cntr_width = atoi(value); + free(value); free(name); } @@ -3318,7 +3336,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, { int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + &ff->ph->env.max_branches, + &ff->ph->env.br_cntr_nr, + &ff->ph->env.br_cntr_width); if (!ret && !ff->ph->env.cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); @@ -3347,7 +3367,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) for (i = 0; i < nr_pmu; i++) { ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, &pmu_caps[i].caps, - &pmu_caps[i].max_branches); + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); if (ret) goto err; @@ -4369,9 +4391,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev->cpus.cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 43bd1ca62d58244583f8c8f742d890c79b6e7b36..52d0ce302ca042ed0bae720273443d3f2794affb 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -123,6 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, if (dump_trace) hisi_ptt_dump_event(ptt, data, size); + free(data); return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc248cbc1b70104a0405d42264fdca..4a0aea0c9e00e09b64520df0948460493acb55b5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -82,6 +82,9 @@ enum hist_column { HISTC_ADDR_TO, HISTC_ADDR, HISTC_SIMD, + HISTC_TYPE, + HISTC_TYPE_OFFSET, + HISTC_SYMBOL_OFFSET, HISTC_NR_COLS, /* Last entry */ }; @@ -457,7 +460,6 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; struct res_sample; enum rstype { @@ -473,16 +475,13 @@ struct block_hist; void attr_to_script(char *buf, struct perf_event_attr *attr); int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +491,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,23 +499,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -541,8 +536,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 7d99a084e82d7c1047d0911365cb3828e6c3ab60..01fb25a1150af8d9af9c1fd222d4aec89a534a4e 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,6 +2,9 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ +#define DWARF_REG_PC 0xd3af9c /* random number */ +#define DWARF_REG_FB 0xd3affb /* random number */ + #ifdef HAVE_DWARF_SUPPORT const char *get_arch_regstr(unsigned int n); /* @@ -10,6 +13,22 @@ const char *get_arch_regstr(unsigned int n); * machine: ELF machine signature (EM_*) */ const char *get_dwarf_regstr(unsigned int n, unsigned int machine); + +int get_arch_regnum(const char *name); +/* + * get_dwarf_regnum - Returns DWARF regnum from register name + * name: architecture register name + * machine: ELF machine signature (EM_*) + */ +int get_dwarf_regnum(const char *name, unsigned int machine); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline int get_dwarf_regnum(const char *name __maybe_unused, + unsigned int machine __maybe_unused) +{ + return -1; +} #endif #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 90c750150b19bdbc0192c5bdc0fd306a2554aeca..b397a769006f45ac1b7716f4efdb2147c86977ae 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -453,7 +453,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, thread__maps(host_thread)); + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; @@ -1285,33 +1285,46 @@ static u64 find_entry_trampoline(struct dso *dso) #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 #define X86_64_ENTRY_TRAMPOLINE 0x6000 +struct machine__map_x86_64_entry_trampolines_args { + struct maps *kmaps; + bool found; +}; + +static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) +{ + struct machine__map_x86_64_entry_trampolines_args *args = data; + struct map *dest_map; + struct kmap *kmap = __map__kmap(map); + + if (!kmap || !is_entry_trampoline(kmap->name)) + return 0; + + dest_map = maps__find(args->kmaps, map__pgoff(map)); + if (dest_map != map) + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); + + args->found = true; + return 0; +} + /* Map x86_64 PTI entry trampolines */ int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = machine__kernel_maps(machine); + struct machine__map_x86_64_entry_trampolines_args args = { + .kmaps = machine__kernel_maps(machine), + .found = false, + }; int nr_cpus_avail, cpu; - bool found = false; - struct map_rb_node *rb_node; u64 pgoff; /* * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(kmaps, rb_node) { - struct map *dest_map, *map = rb_node->map; - struct kmap *kmap = __map__kmap(map); - - if (!kmap || !is_entry_trampoline(kmap->name)) - continue; + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); - dest_map = maps__find(kmaps, map__pgoff(map)); - if (dest_map != map) - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); - found = true; - } - if (found || machine->trampolines_mapped) + if (args.found || machine->trampolines_mapped) return 0; pgoff = find_entry_trampoline(kernel); @@ -1359,8 +1372,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) if (machine->vmlinux_map == NULL) return -ENOMEM; - map__set_map_ip(machine->vmlinux_map, identity__map_ip); - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); } @@ -1750,12 +1762,11 @@ int machine__create_kernel_maps(struct machine *machine) if (end == ~0ULL) { /* update end address of the kernel map using adjacent module address */ - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), - machine__kernel_map(machine)); - struct map_rb_node *next = map_rb_node__next(rb_node); + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), + machine__kernel_map(machine)); if (next) - machine__set_kernel_mmap(machine, start, map__start(next->map)); + machine__set_kernel_mmap(machine, start, map__start(next)); } out_put: @@ -2157,9 +2168,13 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) - thread__put(thread); - + if (thread != NULL) { + if (symbol_conf.keep_exited_threads) + thread__set_exited(thread, /*exited=*/true); + else + machine__remove_thread(machine, thread); + } + thread__put(thread); return 0; } @@ -3395,16 +3410,8 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) { struct maps *maps = machine__kernel_maps(machine); - struct map_rb_node *pos; - int err = 0; - maps__for_each_entry(maps, pos) { - err = fn(pos->map, priv); - if (err != 0) { - break; - } - } - return err; + return maps__for_each_map(maps, fn, priv); } bool machine__is_lock_function(struct machine *machine, u64 addr) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f64b830044217d520f5a2ec34f85e3bf8bbbd44f..54c67cb7ecefa441608e383476c6953563272f5a 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -109,8 +109,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) map__set_pgoff(map, pgoff); map__set_reloc(map, 0); map__set_dso(map, dso__get(dso)); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); } @@ -172,7 +171,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map__init(result, start, start + len, pgoff, dso); if (anon || no_dso) { - map->map_ip = map->unmap_ip = identity__map_ip; + map->mapping_type = MAPPING_TYPE__IDENTITY; /* * Set memory without DSO as loaded. All map__find_* @@ -630,18 +629,3 @@ struct maps *map__kmaps(struct map *map) } return kmap->kmaps; } - -u64 map__dso_map_ip(const struct map *map, u64 ip) -{ - return ip - map__start(map) + map__pgoff(map); -} - -u64 map__dso_unmap_ip(const struct map *map, u64 ip) -{ - return ip + map__start(map) - map__pgoff(map); -} - -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) -{ - return ip; -} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1b53d53adc866eacecabf0f95035343f5434338c..49756716cb132790f3cb9d9c2511fc73c9a294ba 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -16,23 +16,25 @@ struct dso; struct maps; struct machine; +enum mapping_type { + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ + MAPPING_TYPE__DSO, + /* map__map_ip/map__unmap_ip are just the given ip value. */ + MAPPING_TYPE__IDENTITY, +}; + DECLARE_RC_STRUCT(map) { u64 start; u64 end; - bool erange_warned:1; - bool priv:1; - u32 prot; u64 pgoff; u64 reloc; - - /* ip -> dso rip */ - u64 (*map_ip)(const struct map *, u64); - /* dso rip -> ip */ - u64 (*unmap_ip)(const struct map *, u64); - struct dso *dso; refcount_t refcnt; + u32 prot; u32 flags; + enum mapping_type mapping_type:8; + bool erange_warned; + bool priv; }; struct kmap; @@ -41,38 +43,11 @@ struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct maps *map__kmaps(struct map *map); -/* ip -> dso rip */ -u64 map__dso_map_ip(const struct map *map, u64 ip); -/* dso rip -> ip */ -u64 map__dso_unmap_ip(const struct map *map, u64 ip); -/* Returns ip */ -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); - static inline struct dso *map__dso(const struct map *map) { return RC_CHK_ACCESS(map)->dso; } -static inline u64 map__map_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->map_ip(map, ip); -} - -static inline u64 map__unmap_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); -} - -static inline void *map__map_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->map_ip; -} - -static inline void* map__unmap_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->unmap_ip; -} - static inline u64 map__start(const struct map *map) { return RC_CHK_ACCESS(map)->start; @@ -123,6 +98,34 @@ static inline size_t map__size(const struct map *map) return map__end(map) - map__start(map); } +/* ip -> dso rip */ +static inline u64 map__dso_map_ip(const struct map *map, u64 ip) +{ + return ip - map__start(map) + map__pgoff(map); +} + +/* dso rip -> ip */ +static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) +{ + return rip + map__start(map) - map__pgoff(map); +} + +static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_map_ip(map, ip_or_rip); + else + return ip_or_rip; +} + +static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_unmap_ip(map, ip_or_rip); + else + return ip_or_rip; +} + /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); @@ -294,13 +297,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso) RC_CHK_ACCESS(map)->dso = dso; } -static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) +static inline void map__set_mapping_type(struct map *map, enum mapping_type type) { - RC_CHK_ACCESS(map)->map_ip = map_ip; + RC_CHK_ACCESS(map)->mapping_type = type; } -static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) +static inline enum mapping_type map__mapping_type(struct map *map) { - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; + return RC_CHK_ACCESS(map)->mapping_type; } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 233438c95b531f7ac6b571723a9e39f7b4000019..0334fc18d9c65897c5e76111d8cb3a6f8a4a53ba 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -10,6 +10,68 @@ #include "ui/ui.h" #include "unwind.h" +struct map_rb_node { + struct rb_node rb_node; + struct map *map; +}; + +#define maps__for_each_entry(maps, map) \ + for (map = maps__first(maps); map; map = map_rb_node__next(map)) + +#define maps__for_each_entry_safe(maps, map, next) \ + for (map = maps__first(maps), next = map_rb_node__next(map); map; \ + map = next, next = map_rb_node__next(map)) + +static struct rb_root *maps__entries(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->entries; +} + +static struct rw_semaphore *maps__lock(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->lock; +} + +static struct map **maps__maps_by_name(struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_name; +} + +static struct map_rb_node *maps__first(struct maps *maps) +{ + struct rb_node *first = rb_first(maps__entries(maps)); + + if (first) + return rb_entry(first, struct map_rb_node, rb_node); + return NULL; +} + +static struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + + next = rb_next(&node->rb_node); + + if (!next) + return NULL; + + return rb_entry(next, struct map_rb_node, rb_node); +} + +static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node; + + maps__for_each_entry(maps, rb_node) { + if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) + return rb_node; + } + return NULL; +} + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); @@ -196,6 +258,41 @@ void maps__put(struct maps *maps) RC_CHK_PUT(maps); } +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos; + int ret = 0; + + down_read(maps__lock(maps)); + maps__for_each_entry(maps, pos) { + ret = cb(pos->map, data); + if (ret) + break; + } + up_read(maps__lock(maps)); + return ret; +} + +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos, *next; + unsigned int start_nr_maps; + + down_write(maps__lock(maps)); + + start_nr_maps = maps__nr_maps(maps); + maps__for_each_entry_safe(maps, pos, next) { + if (cb(pos->map, data)) { + __maps__remove(maps, pos); + --RC_CHK_ACCESS(maps)->nr_maps; + } + } + if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps)) + __maps__free_maps_by_name(maps); + + up_write(maps__lock(maps)); +} + struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { struct map *map = maps__find(maps, addr); @@ -210,31 +307,40 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return NULL; } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ +struct maps__find_symbol_by_name_args { + struct map **mapp; + const char *name; struct symbol *sym; - struct map_rb_node *pos; +}; - down_read(maps__lock(maps)); +static int maps__find_symbol_by_name_cb(struct map *map, void *data) +{ + struct maps__find_symbol_by_name_args *args = data; - maps__for_each_entry(maps, pos) { - sym = map__find_symbol_by_name(pos->map, name); + args->sym = map__find_symbol_by_name(map, args->name); + if (!args->sym) + return 0; - if (sym == NULL) - continue; - if (!map__contains_symbol(pos->map, sym)) { - sym = NULL; - continue; - } - if (mapp != NULL) - *mapp = pos->map; - goto out; + if (!map__contains_symbol(map, args->sym)) { + args->sym = NULL; + return 0; } - sym = NULL; -out: - up_read(maps__lock(maps)); - return sym; + if (args->mapp != NULL) + *args->mapp = map__get(map); + return 1; +} + +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +{ + struct maps__find_symbol_by_name_args args = { + .mapp = mapp, + .name = name, + .sym = NULL, + }; + + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); + return args.sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) @@ -253,41 +359,46 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -size_t maps__fprintf(struct maps *maps, FILE *fp) -{ - size_t printed = 0; - struct map_rb_node *pos; +struct maps__fprintf_args { + FILE *fp; + size_t printed; +}; - down_read(maps__lock(maps)); +static int maps__fprintf_cb(struct map *map, void *data) +{ + struct maps__fprintf_args *args = data; - maps__for_each_entry(maps, pos) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos->map, fp); - if (verbose > 2) { - printed += dso__fprintf(map__dso(pos->map), fp); - printed += fprintf(fp, "--\n"); - } + args->printed += fprintf(args->fp, "Map:"); + args->printed += map__fprintf(map, args->fp); + if (verbose > 2) { + args->printed += dso__fprintf(map__dso(map), args->fp); + args->printed += fprintf(args->fp, "--\n"); } + return 0; +} - up_read(maps__lock(maps)); +size_t maps__fprintf(struct maps *maps, FILE *fp) +{ + struct maps__fprintf_args args = { + .fp = fp, + .printed = 0, + }; + + maps__for_each_map(maps, maps__fprintf_cb, &args); - return printed; + return args.printed; } -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) +/* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ +static struct rb_node *first_ending_after(struct maps *maps, const struct map *map) { struct rb_root *root; struct rb_node *next, *first; - int err = 0; - - down_write(maps__lock(maps)); root = maps__entries(maps); - - /* - * Find first map where end > map->start. - * Same as find_vma() in kernel. - */ next = root->rb_node; first = NULL; while (next) { @@ -301,8 +412,23 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) } else next = next->rb_right; } + return first; +} - next = first; +/* + * Adds new to maps, if new overlaps existing entries then the existing maps are + * adjusted or removed so that new fits without overlapping any entries. + */ +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) +{ + + struct rb_node *next; + int err = 0; + FILE *fp = debug_file(); + + down_write(maps__lock(maps)); + + next = first_ending_after(maps, new); while (next && !err) { struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); next = rb_next(&pos->rb_node); @@ -311,27 +437,27 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) * Stop if current map starts after map->end. * Maps are ordered by start: next will not overlap for sure. */ - if (map__start(pos->map) >= map__end(map)) + if (map__start(pos->map) >= map__end(new)) break; if (verbose >= 2) { if (use_browser) { pr_debug("overlapping maps in %s (disable tui for more info)\n", - map__dso(map)->name); + map__dso(new)->name); } else { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); + pr_debug("overlapping maps:\n"); + map__fprintf(new, fp); map__fprintf(pos->map, fp); } } - rb_erase_init(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, maps__entries(maps)); /* * Now check if we need to create new maps for areas not * overlapped by the new map: */ - if (map__start(map) > map__start(pos->map)) { + if (map__start(new) > map__start(pos->map)) { struct map *before = map__clone(pos->map); if (before == NULL) { @@ -339,7 +465,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_end(before, map__start(map)); + map__set_end(before, map__start(new)); err = __maps__insert(maps, before); if (err) { map__put(before); @@ -351,7 +477,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) map__put(before); } - if (map__end(map) < map__end(pos->map)) { + if (map__end(new) < map__end(pos->map)) { struct map *after = map__clone(pos->map); if (after == NULL) { @@ -359,10 +485,10 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_start(after, map__end(map)); - map__add_pgoff(after, map__end(map) - map__start(pos->map)); - assert(map__map_ip(pos->map, map__end(map)) == - map__map_ip(after, map__end(map))); + map__set_start(after, map__end(new)); + map__add_pgoff(after, map__end(new) - map__start(pos->map)); + assert(map__map_ip(pos->map, map__end(new)) == + map__map_ip(after, map__end(new))); err = __maps__insert(maps, after); if (err) { map__put(after); @@ -376,16 +502,14 @@ put_map: map__put(pos->map); free(pos); } + /* Add the map. */ + err = __maps__insert(maps, new); up_write(maps__lock(maps)); return err; } -/* - * XXX This should not really _copy_ te maps, but refcount them. - */ -int maps__clone(struct thread *thread, struct maps *parent) +int maps__copy_from(struct maps *maps, struct maps *parent) { - struct maps *maps = thread__maps(thread); int err; struct map_rb_node *rb_node; @@ -416,17 +540,6 @@ out_unlock: return err; } -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) -{ - struct map_rb_node *rb_node; - - maps__for_each_entry(maps, rb_node) { - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) - return rb_node; - } - return NULL; -} - struct map *maps__find(struct maps *maps, u64 ip) { struct rb_node *p; @@ -452,26 +565,275 @@ out: return m ? m->map : NULL; } -struct map_rb_node *maps__first(struct maps *maps) +static int map__strcmp(const void *a, const void *b) { - struct rb_node *first = rb_first(maps__entries(maps)); + const struct map *map_a = *(const struct map **)a; + const struct map *map_b = *(const struct map **)b; + const struct dso *dso_a = map__dso(map_a); + const struct dso *dso_b = map__dso(map_b); + int ret = strcmp(dso_a->short_name, dso_b->short_name); - if (first) - return rb_entry(first, struct map_rb_node, rb_node); - return NULL; + if (ret == 0 && map_a != map_b) { + /* + * Ensure distinct but name equal maps have an order in part to + * aid reference counting. + */ + ret = (int)map__start(map_a) - (int)map__start(map_b); + if (ret == 0) + ret = (int)((intptr_t)map_a - (intptr_t)map_b); + } + + return ret; } -struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +static int map__strcmp_name(const void *name, const void *b) { - struct rb_node *next; + const struct dso *dso = map__dso(*(const struct map **)b); - if (!node) - return NULL; + return strcmp(name, dso->short_name); +} - next = rb_next(&node->rb_node); +void __maps__sort_by_name(struct maps *maps) +{ + qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); +} - if (!next) +static int map__groups__sort_by_name_from_rbtree(struct maps *maps) +{ + struct map_rb_node *rb_node; + struct map **maps_by_name = realloc(maps__maps_by_name(maps), + maps__nr_maps(maps) * sizeof(struct map *)); + int i = 0; + + if (maps_by_name == NULL) + return -1; + + up_read(maps__lock(maps)); + down_write(maps__lock(maps)); + + RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; + RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); + + maps__for_each_entry(maps, rb_node) + maps_by_name[i++] = map__get(rb_node->map); + + __maps__sort_by_name(maps); + + up_write(maps__lock(maps)); + down_read(maps__lock(maps)); + + return 0; +} + +static struct map *__maps__find_by_name(struct maps *maps, const char *name) +{ + struct map **mapp; + + if (maps__maps_by_name(maps) == NULL && + map__groups__sort_by_name_from_rbtree(maps)) return NULL; - return rb_entry(next, struct map_rb_node, rb_node); + mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), + sizeof(*mapp), map__strcmp_name); + if (mapp) + return *mapp; + return NULL; +} + +struct map *maps__find_by_name(struct maps *maps, const char *name) +{ + struct map_rb_node *rb_node; + struct map *map; + + down_read(maps__lock(maps)); + + + if (RC_CHK_ACCESS(maps)->last_search_by_name) { + const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); + + if (strcmp(dso->short_name, name) == 0) { + map = RC_CHK_ACCESS(maps)->last_search_by_name; + goto out_unlock; + } + } + /* + * If we have maps->maps_by_name, then the name isn't in the rbtree, + * as maps->maps_by_name mirrors the rbtree when lookups by name are + * made. + */ + map = __maps__find_by_name(maps, name); + if (map || maps__maps_by_name(maps) != NULL) + goto out_unlock; + + /* Fallback to traversing the rbtree... */ + maps__for_each_entry(maps, rb_node) { + struct dso *dso; + + map = rb_node->map; + dso = map__dso(map); + if (strcmp(dso->short_name, name) == 0) { + RC_CHK_ACCESS(maps)->last_search_by_name = map; + goto out_unlock; + } + } + map = NULL; + +out_unlock: + up_read(maps__lock(maps)); + return map; +} + +struct map *maps__find_next_entry(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node = maps__find_node(maps, map); + struct map_rb_node *next = map_rb_node__next(rb_node); + + if (next) + return next->map; + + return NULL; +} + +void maps__fixup_end(struct maps *maps) +{ + struct map_rb_node *prev = NULL, *curr; + + down_write(maps__lock(maps)); + + maps__for_each_entry(maps, curr) { + if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map))) + map__set_end(prev->map, map__start(curr->map)); + + prev = curr; + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + if (curr && !map__end(curr->map)) + map__set_end(curr->map, ~0ULL); + + up_write(maps__lock(maps)); +} + +/* + * Merges map into maps by splitting the new map within the existing map + * regions. + */ +int maps__merge_in(struct maps *kmaps, struct map *new_map) +{ + struct map_rb_node *rb_node; + struct rb_node *first; + bool overlaps; + LIST_HEAD(merged); + int err = 0; + + down_read(maps__lock(kmaps)); + first = first_ending_after(kmaps, new_map); + rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL; + overlaps = rb_node && map__start(rb_node->map) < map__end(new_map); + up_read(maps__lock(kmaps)); + + if (!overlaps) + return maps__insert(kmaps, new_map); + + maps__for_each_entry(kmaps, rb_node) { + struct map *old_map = rb_node->map; + + /* no overload with this one */ + if (map__end(new_map) < map__start(old_map) || + map__start(new_map) >= map__end(old_map)) + continue; + + if (map__start(new_map) < map__start(old_map)) { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new......| -> |new..| + * |old....| -> |old....| + */ + map__set_end(new_map, map__start(old_map)); + } else { + /* + * |new.............| -> |new..| |new..| + * |old....| -> |old....| + */ + struct map_list_node *m = map_list_node__new(); + + if (!m) { + err = -ENOMEM; + goto out; + } + + m->map = map__clone(new_map); + if (!m->map) { + free(m); + err = -ENOMEM; + goto out; + } + + map__set_end(m->map, map__start(old_map)); + list_add_tail(&m->node, &merged); + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } else { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new..| -> x + * |old.........| -> |old.........| + */ + map__put(new_map); + new_map = NULL; + break; + } else { + /* + * |new......| -> |new...| + * |old....| -> |old....| + */ + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } + } + +out: + while (!list_empty(&merged)) { + struct map_list_node *old_node; + + old_node = list_entry(merged.next, struct map_list_node, node); + list_del_init(&old_node->node); + if (!err) + err = maps__insert(kmaps, old_node->map); + map__put(old_node->map); + free(old_node); + } + + if (new_map) { + if (!err) + err = maps__insert(kmaps, new_map); + map__put(new_map); + } + return err; +} + +void maps__load_first(struct maps *maps) +{ + struct map_rb_node *first; + + down_read(maps__lock(maps)); + + first = maps__first(maps); + if (first) + map__load(first->map); + + up_read(maps__lock(maps)); } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 83144e0645ed46598c7f0500607ffc4e58c2cf71..d836d04c940229a70a30561ade8dc40edb02b4f0 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -14,24 +14,18 @@ struct ref_reloc_sym; struct machine; struct map; struct maps; -struct thread; -struct map_rb_node { - struct rb_node rb_node; +struct map_list_node { + struct list_head node; struct map *map; }; -struct map_rb_node *maps__first(struct maps *maps); -struct map_rb_node *map_rb_node__next(struct map_rb_node *node); -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); - -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map_rb_node__next(map)) +static inline struct map_list_node *map_list_node__new(void) +{ + return malloc(sizeof(struct map_list_node)); +} -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ - map = next, next = map_rb_node__next(map)) +struct map *maps__find(struct maps *maps, u64 addr); DECLARE_RC_STRUCT(maps) { struct rb_root entries; @@ -58,7 +52,7 @@ struct kmap { struct maps *maps__new(struct machine *machine); bool maps__empty(struct maps *maps); -int maps__clone(struct thread *thread, struct maps *parent); +int maps__copy_from(struct maps *maps, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); @@ -71,26 +65,16 @@ static inline void __maps__zput(struct maps **map) #define maps__zput(map) __maps__zput(&map) -static inline struct rb_root *maps__entries(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->entries; -} +/* Iterate over map calling cb for each entry. */ +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); +/* Iterate over map removing an entry if cb returns true. */ +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); static inline struct machine *maps__machine(struct maps *maps) { return RC_CHK_ACCESS(maps)->machine; } -static inline struct rw_semaphore *maps__lock(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->lock; -} - -static inline struct map **maps__maps_by_name(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->maps_by_name; -} - static inline unsigned int maps__nr_maps(const struct maps *maps) { return RC_CHK_ACCESS(maps)->nr_maps; @@ -125,12 +109,18 @@ struct addr_map_symbol; int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); +struct map *maps__find_next_entry(struct maps *maps, struct map *map); + int maps__merge_in(struct maps *kmaps, struct map *new_map); void __maps__sort_by_name(struct maps *maps); +void maps__fixup_end(struct maps *maps); + +void maps__load_first(struct maps *maps); + #endif // __PERF_MAPS_H diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 954b235e12e51700f43e3901636b5a24e5317b42..3a2e3687878c1862c64d0f723496a76ceb2f8229 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str) return -1; } -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu, + struct perf_mem_event *e) { + char sysfs_name[100]; char path[PATH_MAX]; struct stat st; + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); return !stat(path, &st); } @@ -120,7 +123,6 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; /* @@ -136,12 +138,12 @@ int perf_mem_events__init(void) * of core PMU. */ while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); + e->supported |= perf_mem_event__supported(mnt, pmu, e); + if (e->supported) { + found = true; + break; + } } - - if (e->supported) - found = true; } return found ? 0 : -ENOENT; @@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int idx) { const char *mnt = sysfs__mount(); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, - pmu->name); - if (!perf_mem_event__supported(mnt, sysfs_name)) { + if (!perf_mem_event__supported(mnt, pmu, e)) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(idx, pmu->name)); } @@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **rec_tmp, int *tmp_nr) { + const char *mnt = sysfs__mount(); int i = *argv_nr, k = 0; struct perf_mem_event *e; @@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, while ((pmu = perf_pmus__scan(pmu)) != NULL) { const char *s = perf_mem_events__name(j, pmu->name); + if (!perf_mem_event__supported(mnt, pmu, e)) + continue; + rec_argv[i++] = "-e"; if (s) { char *copy = strdup(s); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 49093b21ee2da034e6634ab7dbf34dd08045faeb..122ee198a86e9961d5b6c63dffc4c0d1e1636127 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -295,15 +295,14 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; - map->comp_level = mp->comp_level; #ifndef PYTHON_PERF - if (zstd_init(&map->zstd_data, map->comp_level)) { + if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } #endif - if (map->comp_level && !perf_mmap__aio_enabled(map)) { + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f944c3cd5efa0b04c8e870b834c40f4fdcaa4f1d..0df6e1621c7e8fcf5857d3f3ce5709739fe53064 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,7 +39,6 @@ struct mmap { #endif struct mmap_cpu_mask affinity_mask; void *data; - int comp_level; struct perf_data_file *file; struct zstd_data zstd_data; }; diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index fd67d204d720d9ba7859fa25e1b96a5b1ebf9c75..f7f7aff3d85a049000828a9fcb9ecc3ad9026389 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), BRANCH_END }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aa2f5c6fc7fc24f205b9c88012dfd8016fdf9b3e..66eabcea424274580abe69fd5226b16931a67dec 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_error *err) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -986,15 +986,23 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { + /* + * Rewrite the PMU event to a legacy cache one unless the PMU + * doesn't support legacy cache events or the event is present + * within the PMU. + */ + if (perf_pmu__supports_legacy_cache(pmu) && + !perf_pmu__have_event(pmu, term->config)) { attr->type = PERF_TYPE_HW_CACHE; return parse_events__decode_legacy_cache(term->config, pmu->type, &attr->config); - } else + } else { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } } if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -1004,10 +1012,19 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + /* + * If the PMU has a sysfs or json event prefer it over + * legacy. ARM requires this. + */ + if (perf_pmu__have_event(pmu, term->config)) { + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } else { + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1381,6 +1398,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, YYLTYPE *loc = loc_; LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; + bool alias_rewrote_terms = false; pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1433,7 +1451,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) { + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); + return -EINVAL; + } + + /* Look for event names in the terms and rewrite into format based terms. */ + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1447,11 +1473,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index e1e2d701599c4294f05e1c73ca5aef4cd1ef8544..1de3b69cdf4aafb7fdc73574b2b44a3cad38a75e 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) struct perf_cpu cpu; int ret, i = 0; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; cpu = perf_cpu_map__cpu(cpus, 0); @@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void) struct perf_cpu cpu; int fd; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 2247991451f3aa1ba0969b9ad4f1f22e595b2a21..8f04d3b7f3ec783bee9981fa096b145e80fabc91 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), + bit_name(COUNTERS), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d3c9aa4326bee4ba3d3b6594349d4e0426c2aae3..3c9609944a2f312e7cac681f8a19dd037d6eb01e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1494,12 +1494,14 @@ static int check_info_data(struct perf_pmu *pmu, * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err) + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1521,7 +1523,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } - + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; @@ -1615,6 +1617,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { + if (!name) + return false; if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d2895d415f08fbf941bfd1bfa52f371307228e09..424c3fee09496248d6168ba5361d4fa9f66e28a2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -201,7 +201,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err); + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1a5b7fa459b232043457f706b45c0b7a87d35643..a1a796043691f487fe901e9fafef5888913f4ec7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -149,10 +149,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, return 0; } +struct kernel_get_module_map_cb_args { + const char *module; + struct map *result; +}; + +static int kernel_get_module_map_cb(struct map *map, void *data) +{ + struct kernel_get_module_map_cb_args *args = data; + struct dso *dso = map__dso(map); + const char *short_name = dso->short_name; /* short_name is "[module]" */ + u16 short_name_len = dso->short_name_len; + + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && + args->module[short_name_len - 2] == '\0') { + args->result = map__get(map); + return 1; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { - struct maps *maps = machine__kernel_maps(host_machine); - struct map_rb_node *pos; + struct kernel_get_module_map_cb_args args = { + .module = module, + .result = NULL, + }; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) @@ -164,19 +186,9 @@ static struct map *kernel_get_module_map(const char *module) return map__get(map); } - maps__for_each_entry(maps, pos) { - /* short_name is "[module]" */ - struct dso *dso = map__dso(pos->map); - const char *short_name = dso->short_name; - u16 short_name_len = dso->short_name_len; + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); - if (strncmp(short_name + 1, module, - short_name_len - 2) == 0 && - module[short_name_len - 2] == '\0') { - return map__get(pos->map); - } - } - return NULL; + return args.result; } struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f171360b0ef4db06eb0ff760619996cf58339943..c8923375e30d6618fda564b84c41317c46009a3d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -23,6 +23,7 @@ #include "event.h" #include "dso.h" #include "debug.h" +#include "debuginfo.h" #include "intlist.h" #include "strbuf.h" #include "strlist.h" @@ -31,128 +32,9 @@ #include "probe-file.h" #include "string2.h" -#ifdef HAVE_DEBUGINFOD_SUPPORT -#include -#endif - /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Dwarf FL wrappers */ -static char *debuginfo_path; /* Currently dummy */ - -static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .section_address = dwfl_offline_section_address, - - /* We use this table for core files too. */ - .find_elf = dwfl_build_id_find_elf, -}; - -/* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, - const char *path) -{ - GElf_Addr dummy; - int fd; - - fd = open(path, O_RDONLY); - if (fd < 0) - return fd; - - dbg->dwfl = dwfl_begin(&offline_callbacks); - if (!dbg->dwfl) - goto error; - - dwfl_report_begin(dbg->dwfl); - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); - if (!dbg->mod) - goto error; - - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); - if (!dbg->dbg) - goto error; - - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); - - dwfl_report_end(dbg->dwfl, NULL, NULL); - - return 0; -error: - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - else - close(fd); - memset(dbg, 0, sizeof(*dbg)); - - return -ENOENT; -} - -static struct debuginfo *__debuginfo__new(const char *path) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - if (!dbg) - return NULL; - - if (debuginfo__init_offline_dwarf(dbg, path) < 0) - zfree(&dbg); - if (dbg) - pr_debug("Open Debuginfo file: %s\n", path); - return dbg; -} - -enum dso_binary_type distro_dwarf_types[] = { - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__NOT_FOUND, -}; - -struct debuginfo *debuginfo__new(const char *path) -{ - enum dso_binary_type *type; - char buf[PATH_MAX], nil = '\0'; - struct dso *dso; - struct debuginfo *dinfo = NULL; - struct build_id bid; - - /* Try to open distro debuginfo files */ - dso = dso__new(path); - if (!dso) - goto out; - - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) - dso__set_build_id(dso, &bid); - - for (type = distro_dwarf_types; - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; - type++) { - if (dso__read_binary_type_filename(dso, *type, &nil, - buf, PATH_MAX) < 0) - continue; - dinfo = __debuginfo__new(buf); - } - dso__put(dso); - -out: - /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); -} - -void debuginfo__delete(struct debuginfo *dbg) -{ - if (dbg) { - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - free(dbg); - } -} - /* * Probe finder related functions */ @@ -722,7 +604,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -733,7 +615,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1258,7 +1140,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->machine = ehdr.e_machine; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1268,7 +1150,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1677,44 +1559,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } -/* For the kernel module, we need a special code to get a DIE */ -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset) -{ - int n, i; - Elf32_Word shndx; - Elf_Scn *scn; - Elf *elf; - GElf_Shdr mem, *shdr; - const char *p; - - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); - if (!elf) - return -EINVAL; - - /* Get the number of relocations */ - n = dwfl_module_relocations(dbg->mod); - if (n < 0) - return -ENOENT; - /* Search the relocation related .text section */ - for (i = 0; i < n; i++) { - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); - if (strcmp(p, ".text") == 0) { - /* OK, get the section header */ - scn = elf_getscn(elf, shndx); - if (!scn) - return -ENOENT; - shdr = gelf_getshdr(scn, &mem); - if (!shdr) - return -ENOENT; - *offs = shdr->sh_addr; - if (adjust_offset) - *offs -= shdr->sh_offset; - } - } - return 0; -} - /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) @@ -2009,41 +1853,6 @@ found: return (ret < 0) ? ret : lf.found; } -#ifdef HAVE_DEBUGINFOD_SUPPORT -/* debuginfod doesn't require the comp_dir but buildid is required */ -static int get_source_from_debuginfod(const char *raw_path, - const char *sbuild_id, char **new_path) -{ - debuginfod_client *c = debuginfod_begin(); - const char *p = raw_path; - int fd; - - if (!c) - return -ENOMEM; - - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, - 0, p, new_path); - pr_debug("Search %s from debuginfod -> %d\n", p, fd); - if (fd >= 0) - close(fd); - debuginfod_end(c); - if (fd < 0) { - pr_debug("Failed to find %s in debuginfod (%s)\n", - raw_path, sbuild_id); - return -ENOENT; - } - pr_debug("Got a source %s\n", *new_path); - - return 0; -} -#else -static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, - const char *sbuild_id __maybe_unused, - char **new_path __maybe_unused) -{ - return -ENOTSUP; -} -#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 8bc1c80d3c1c0b616659a10183d9f463c1008ffe..3add5ff516e12de544b38cf5e48a45f922b0dee2 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -24,21 +24,7 @@ static inline int is_c_varname(const char *name) #ifdef HAVE_DWARF_SUPPORT #include "dwarf-aux.h" - -/* TODO: export debuginfo data structure even if no dwarf support */ - -/* debug information structure */ -struct debuginfo { - Dwarf *dbg; - Dwfl_Module *mod; - Dwfl *dwfl; - Dwarf_Addr bias; - const unsigned char *build_id; -}; - -/* This also tries to open distro debuginfo */ -struct debuginfo *debuginfo__new(const char *path); -void debuginfo__delete(struct debuginfo *dbg); +#include "debuginfo.h" /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, @@ -49,9 +35,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset); - /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9eb5c6a08999e83bb1ef05117ba8ce926d93dc16..87e817b3cf7e9d9b94799cfcb47cbd98b4c1ff02 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -237,8 +237,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); if (cpus) cpu = perf_cpu_map__cpu(cpus, 0); diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index f55ca07f3ca12d912fcc4c12bb02d925c3eafe71..74b36644e384990a252e4623b1cf5e1ab70f789e 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -12,6 +12,8 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ +#define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 115b16edb45138cb1d460afa2fbcc696ebee60e9..53383e97ec9d5731276fcce0021ec6a5176196c5 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -51,8 +51,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample) struct cf_trailer_entry *te; struct cf_ctrset_entry *cep, ce; - if (!len) - return false; while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); ce.def = be16_to_cpu(cep->def); @@ -125,6 +123,9 @@ static int get_counterset_start(int setnr) return 128; case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ return 448; + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ + return setnr; default: return -1; } @@ -212,27 +213,120 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" +/* + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. + */ +struct pai_data { /* Event number and value */ + u16 event_nr; + u64 event_val; +} __packed; + +#pragma GCC diagnostic pop + +/* + * Test for valid raw data. At least one PAI event should be in the raw + * data section. + */ +static bool s390_pai_all_test(struct perf_sample *sample) +{ + size_t len = sample->raw_size; + + if (len < 0xa) + return false; + return true; +} + +static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *p = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct pai_data pai_data; + char *ev_name; + + while (offset < len) { + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); + p += sizeof(pai_data.event_nr); + offset += sizeof(pai_data.event_nr); + + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); + pai_data.event_val = be64_to_cpu(pai_data.event_val); + p += sizeof(pai_data.event_val); + offset += sizeof(pai_data.event_val); + + ev_name = get_counter_name(evsel->core.attr.config, + pai_data.event_nr, evsel->pmu); + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + pai_data.event_nr, ev_name ?: "", + pai_data.event_val); + free(ev_name); + + if (offset + 0xa > len) + break; + } + color_fprintf(stdout, color, "\n"); +} + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events - * and if the event was triggered by a counter set diagnostic event display - * its raw data. + * and if the event was triggered by a + * - counter set diagnostic event + * - processor activity assist (PAI) crypto counter event + * - processor activity assist (PAI) neural network processor assist (NNPA) + * counter event + * display its raw data. * The function is only invoked when the dump flag -D is set. + * + * Function evlist__s390_sample_raw() is defined as call back after it has + * been verified that the perf.data file was created on s390 platform. */ -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) { + const char *pai_name; struct evsel *evsel; if (event->header.type != PERF_RECORD_SAMPLE) return; evsel = evlist__event2evsel(evlist, event); - if (evsel == NULL || - evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) + if (!evsel) + return; + + /* Check for raw data in sample */ + if (!sample->raw_size || !sample->raw_data) return; /* Display raw data on screen */ - if (!s390_cpumcfdg_testctr(sample)) { - pr_err("Invalid counter set data encountered\n"); + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find("cpum_cf"); + if (!s390_cpumcfdg_testctr(sample)) + pr_err("Invalid counter set data encountered\n"); + else + s390_cpumcfdg_dump(evsel->pmu, sample); + return; + } + + switch (evsel->core.attr.config) { + case PERF_EVENT_PAI_NNPA_ALL: + pai_name = "NNPA_ALL"; + break; + case PERF_EVENT_PAI_CRYPTO_ALL: + pai_name = "CRYPTO_ALL"; + break; + default: return; } - s390_cpumcfdg_dump(evsel->pmu, sample); + + if (!s390_pai_all_test(sample)) { + pr_err("Invalid %s raw data encountered\n", pai_name); + } else { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); + s390_pai_all_dump(evsel, sample); + } } diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index c92ad0f51ecd97d5727474b0a6b73e24ef37c41e..70b2c3135555ec2689fb5e824293195103c41590 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -113,6 +113,7 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + u64 *branch_stack_cntr; struct regs_dump user_regs; struct regs_dump intr_regs; struct stack_dump user_stack; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 603091317bed9be476117251fcda7caacaad5f54..b072ac5d3bc228ec628f054e86b94d422ee4cf76 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -490,6 +490,9 @@ static int perl_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; command_line = malloc((argc + 2) * sizeof(const char *)); + if (!command_line) + return -ENOMEM; + command_line[0] = ""; command_line[1] = script; for (i = 2; i < argc + 2; i++) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 94312741443abf8d858c6cd9d71e415d74042b5c..860e1837ba9693eb437a9ae68d762f5a89cf1d2d 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -353,6 +353,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event, if (is_array) { list = PyList_New(field->arraylen); + if (!list) + Py_FatalError("couldn't create Python list"); item_size = field->size / field->arraylen; n_items = field->arraylen; } else { @@ -754,7 +756,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } -static void set_regs_in_dict(PyObject *dict, +static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) { @@ -770,6 +772,8 @@ static void set_regs_in_dict(PyObject *dict, */ int size = __sw_hweight64(attr->sample_regs_intr) * 28; char *bf = malloc(size); + if (!bf) + return -1; regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); @@ -781,6 +785,8 @@ static void set_regs_in_dict(PyObject *dict, pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); free(bf); + + return 0; } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, @@ -920,7 +926,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->cyc_cnt)); } - set_regs_in_dict(dict, sample, evsel); + if (set_regs_in_dict(dict, sample, evsel)) + Py_FatalError("Failed to setting regs in dict"); return dict; } @@ -1918,12 +1925,18 @@ static int python_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; #if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); + if (!command_line) + return -1; + command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; PyImport_AppendInittab(name, initperf_trace_context); #else command_line = malloc((argc + 1) * sizeof(wchar_t *)); + if (!command_line) + return -1; + command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1e9aa8ed15b6445eb906b76738f1c780cebb0713..199d3e8df31581c02245967d795847783556ee4c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -115,6 +115,11 @@ static int perf_session__open(struct perf_session *session, int repipe_fd) return -1; } + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { + /* Auxiliary events may reference exited threads, hold onto dead ones. */ + symbol_conf.keep_exited_threads = true; + } + if (perf_data__is_pipe(data)) return 0; @@ -1150,9 +1155,13 @@ static void callchain__printf(struct evsel *evsel, i, callchain->ips[i]); } -static void branch_stack__printf(struct perf_sample *sample, bool callstack) +static void branch_stack__printf(struct perf_sample *sample, + struct evsel *evsel) { struct branch_entry *entries = perf_sample__branch_entries(sample); + bool callstack = evsel__has_branch_callstack(evsel); + u64 *branch_stack_cntr = sample->branch_stack_cntr; + struct perf_env *env = evsel__env(evsel); uint64_t i; if (!callstack) { @@ -1194,6 +1203,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } } + + if (branch_stack_cntr) { + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); + } } static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) @@ -1355,7 +1371,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, callchain__printf(evsel, sample); if (evsel__has_br_stack(evsel)) - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); + branch_stack__printf(sample, evsel); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample, arch); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80e4f613274015deb70028c76dc9b60ea103f6e8..30254eb637099b07427d73944a6bb4e7baffc724 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "mem-events.h" #include "annotate.h" +#include "annotate-data.h" #include "event.h" #include "time-utils.h" #include "cgroup.h" @@ -418,6 +419,52 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort symoff */ + +static int64_t +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_cmp(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int64_t +sort__symoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_sort(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int +hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + struct symbol *sym = he->ms.sym; + + if (sym == NULL) + return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip); + + return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start); +} + +struct sort_entry sort_sym_offset = { + .se_header = "Symbol Offset", + .se_cmp = sort__symoff_cmp, + .se_sort = sort__symoff_sort, + .se_snprintf = hist_entry__symoff_snprintf, + .se_filter = hist_entry__sym_filter, + .se_width_idx = HISTC_SYMBOL_OFFSET, +}; + /* --sort srcline */ char *hist_entry__srcline(struct hist_entry *he) @@ -583,21 +630,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); @@ -2094,7 +2141,7 @@ struct sort_entry sort_dso_size = { .se_width_idx = HISTC_DSO_SIZE, }; -/* --sort dso_size */ +/* --sort addr */ static int64_t sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -2131,6 +2178,152 @@ struct sort_entry sort_addr = { .se_width_idx = HISTC_ADDR, }; +/* --sort type */ + +struct annotated_data_type unknown_type = { + .self = { + .type_name = (char *)"(unknown)", + .children = LIST_HEAD_INIT(unknown_type.self.children), + }, +}; + +static int64_t +sort__type_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return sort__addr_cmp(left, right); +} + +static void sort__type_init(struct hist_entry *he) +{ + if (he->mem_type) + return; + + he->mem_type = hist_entry__get_data_type(he); + if (he->mem_type == NULL) { + he->mem_type = &unknown_type; + he->mem_type_off = 0; + } +} + +static int64_t +sort__type_collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + return strcmp(left_type->self.type_name, right_type->self.type_name); +} + +static int64_t +sort__type_sort(struct hist_entry *left, struct hist_entry *right) +{ + return sort__type_collapse(left, right); +} + +static int hist_entry__type_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name); +} + +struct sort_entry sort_type = { + .se_header = "Data Type", + .se_cmp = sort__type_cmp, + .se_collapse = sort__type_collapse, + .se_sort = sort__type_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__type_snprintf, + .se_width_idx = HISTC_TYPE, +}; + +/* --sort typeoff */ + +static int64_t +sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + return left->mem_type_off - right->mem_type_off; +} + +static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return; + + list_for_each_entry(child, &m->children, node) { + if (child->offset <= offset && offset < child->offset + child->size) { + int len = 0; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } + + fill_member_name(buf + len, sz - len, child, offset, first); + return; + } + } +} + +static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + char buf[4096]; + + buf[0] = '\0'; + if (list_empty(&he_type->self.children)) + snprintf(buf, sizeof(buf), "no field"); + else + fill_member_name(buf, sizeof(buf), &he_type->self, + he->mem_type_off, true); + buf[4095] = '\0'; + + return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name, + he->mem_type_off, buf); +} + +struct sort_entry sort_type_offset = { + .se_header = "Data Type Offset", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typeoff_sort, + .se_sort = sort__typeoff_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typeoff_snprintf, + .se_width_idx = HISTC_TYPE_OFFSET, +}; + struct sort_dimension { const char *name; @@ -2185,7 +2378,10 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), - DIM(SORT_SIMD, "simd", sort_simd) + DIM(SORT_SIMD, "simd", sort_simd), + DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), + DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), + DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), }; #undef DIM @@ -3205,6 +3401,8 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, list->thread = 1; } else if (sd->entry == &sort_comm) { list->comm = 1; + } else if (sd->entry == &sort_type_offset) { + symbol_conf.annotate_data_member = true; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ecfb7f1359d5ee8a16ad02dabf226d467e2937d6..6f6b4189a389780f8aabd0089f48ea28d759a37a 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -15,6 +15,7 @@ struct option; struct thread; +struct annotated_data_type; extern regex_t parent_regex; extern const char *sort_order; @@ -34,6 +35,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; +extern struct sort_entry sort_type; extern const char default_mem_sort_order[]; extern bool chk_double_cl; @@ -111,6 +113,7 @@ struct hist_entry { u64 p_stage_cyc; u8 cpumode; u8 depth; + int mem_type_off; struct simd_flags simd_flags; /* We are added by hists__add_dummy_entry. */ @@ -154,6 +157,7 @@ struct hist_entry { struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; struct hist_entry_ops *ops; + struct annotated_data_type *mem_type; union { /* this is for hierarchical entry structure */ struct { @@ -243,6 +247,9 @@ enum sort_type { SORT_LOCAL_RETIRE_LAT, SORT_GLOBAL_RETIRE_LAT, SORT_SIMD, + SORT_ANNOTATE_DATA_TYPE, + SORT_ANNOTATE_DATA_TYPE_OFFSET, + SORT_SYM_OFFSET, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index afe6db8e7bf4fb632126086f80adba6909a695cd..8c61f8627ebc9fb37cd645ea87a1d39378009db7 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -898,7 +898,7 @@ static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (config->no_merge || hybrid_uniquify(counter, config)) + if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) uniquify_event_name(counter); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 1c5c3eeba4cfb2e4d7b1914ab1e4a4d033562fec..e31426167852ad0d6fe2d94b5f8b84e2a7e7da8a 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -264,7 +264,7 @@ static void print_ll_miss(struct perf_stat_config *config, static const double color_ratios[3] = {20.0, 10.0, 5.0}; print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, - "of all L1-icache accesses"); + "of all LL-cache accesses"); } static void print_dtlb_miss(struct perf_stat_config *config, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ec350604221736783df773d40e1ea66bb6983283..b0bcf92f0f9c37e9d74bade174c148ce4c7a8805 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!counter->per_pkg) return 0; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return 0; if (!mask) { @@ -592,7 +592,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev { struct evsel *evsel; - if (config->no_merge) + if (config->aggr_mode == AGGR_NONE) return; evlist__for_each_entry(evlist, evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 325d0fad18424f904037a57de2005030bfc1a469..4357ba1148221bf27364ee14abe1184669635d1a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -76,7 +76,6 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; - bool no_merge; bool hybrid_merge; bool walltime_run_table; bool all_kernel; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e7eeaf616b866894665eef45242beb71d6485c6..4b934ed3bfd13ba2bf1c613e13e743c664b260aa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1392,8 +1392,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); map__set_end(map, map__start(map) + shdr->sh_size); map__set_pgoff(map, shdr->sh_offset); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); /* Ensure maps are correctly ordered */ if (kmaps) { int err; @@ -1455,8 +1454,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); map__set_pgoff(curr_map, shdr->sh_offset); } else { - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } curr_dso->symtab_type = dso->symtab_type; if (maps__insert(kmaps, curr_map)) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a81a14769bd101bdeb207a05ec4b858ac04b8193..1da8b713509c5367b9d68d666b71cf4a1d9db6fd 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -159,9 +159,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } else { Elf64_Ehdr ehdr; @@ -210,9 +211,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } out_free: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 82cc74b9358e0de701622378b1cffb9c0047adb6..be212ba157dc321d96534ba6063febbc1ccd5e04 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,11 +48,6 @@ static bool symbol__is_idle(const char *name); int vmlinux_path__nr_entries; char **vmlinux_path; -struct map_list_node { - struct list_head node; - struct map *map; -}; - struct symbol_conf symbol_conf = { .nanosecs = false, .use_modules = true, @@ -90,11 +85,6 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static struct map_list_node *map_list_node__new(void) -{ - return malloc(sizeof(struct map_list_node)); -} - static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); @@ -270,29 +260,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) curr->end = roundup(curr->start, 4096) + 4096; } -void maps__fixup_end(struct maps *maps) -{ - struct map_rb_node *prev = NULL, *curr; - - down_write(maps__lock(maps)); - - maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) - map__set_end(prev->map, map__start(curr->map)); - - prev = curr; - } - - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - if (curr && !map__end(curr->map)) - map__set_end(curr->map, ~0ULL); - - up_write(maps__lock(maps)); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; @@ -956,8 +923,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; } - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); if (maps__insert(kmaps, curr_map)) { dso__put(ndso); return -1; @@ -1148,33 +1114,35 @@ out_delete_from: return ret; } +static int do_validate_kcore_modules_cb(struct map *old_map, void *data) +{ + struct rb_root *modules = data; + struct module_info *mi; + struct dso *dso; + + if (!__map__is_kmodule(old_map)) + return 0; + + dso = map__dso(old_map); + /* Module must be in memory at the same address */ + mi = find_module(dso->short_name, modules); + if (!mi || mi->start != map__start(old_map)) + return -EINVAL; + + return 0; +} + static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; - struct map_rb_node *old_node; int err; err = read_proc_modules(filename, &modules); if (err) return err; - maps__for_each_entry(kmaps, old_node) { - struct map *old_map = old_node->map; - struct module_info *mi; - struct dso *dso; + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); - if (!__map__is_kmodule(old_map)) { - continue; - } - dso = map__dso(old_map); - /* Module must be in memory at the same address */ - mi = find_module(dso->short_name, &modules); - if (!mi || mi->start != map__start(old_map)) { - err = -EINVAL; - goto out; - } - } -out: delete_modules(&modules); return err; } @@ -1271,101 +1239,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } -/* - * Merges map into maps by splitting the new map within the existing map - * regions. - */ -int maps__merge_in(struct maps *kmaps, struct map *new_map) +static bool remove_old_maps(struct map *map, void *data) { - struct map_rb_node *rb_node; - LIST_HEAD(merged); - int err = 0; - - maps__for_each_entry(kmaps, rb_node) { - struct map *old_map = rb_node->map; - - /* no overload with this one */ - if (map__end(new_map) < map__start(old_map) || - map__start(new_map) >= map__end(old_map)) - continue; - - if (map__start(new_map) < map__start(old_map)) { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new......| -> |new..| - * |old....| -> |old....| - */ - map__set_end(new_map, map__start(old_map)); - } else { - /* - * |new.............| -> |new..| |new..| - * |old....| -> |old....| - */ - struct map_list_node *m = map_list_node__new(); - - if (!m) { - err = -ENOMEM; - goto out; - } - - m->map = map__clone(new_map); - if (!m->map) { - free(m); - err = -ENOMEM; - goto out; - } - - map__set_end(m->map, map__start(old_map)); - list_add_tail(&m->node, &merged); - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } else { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new..| -> x - * |old.........| -> |old.........| - */ - map__put(new_map); - new_map = NULL; - break; - } else { - /* - * |new......| -> |new...| - * |old....| -> |old....| - */ - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } - } - -out: - while (!list_empty(&merged)) { - struct map_list_node *old_node; - - old_node = list_entry(merged.next, struct map_list_node, node); - list_del_init(&old_node->node); - if (!err) - err = maps__insert(kmaps, old_node->map); - map__put(old_node->map); - free(old_node); - } + const struct map *map_to_save = data; - if (new_map) { - if (!err) - err = maps__insert(kmaps, new_map); - map__put(new_map); - } - return err; + /* + * We need to preserve eBPF maps even if they are covered by kcore, + * because we need to access eBPF dso for source data. + */ + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); } static int dso__load_kcore(struct dso *dso, struct map *map, @@ -1374,7 +1256,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map, struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; struct map *replacement_map = NULL; - struct map_rb_node *old_node, *next; struct machine *machine; bool is_64_bit; int err, fd; @@ -1421,17 +1302,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - maps__for_each_entry_safe(kmaps, old_node, next) { - struct map *old_map = old_node->map; - - /* - * We need to preserve eBPF maps even if they are - * covered by kcore, because we need to access - * eBPF dso for source data. - */ - if (old_map != map && !__map__is_bpf_prog(old_map)) - maps__remove(kmaps, old_map); - } + maps__remove_maps(kmaps, remove_old_maps, map); machine->trampolines_mapped = false; /* Find the kernel map using the '_stext' symbol */ @@ -1475,8 +1346,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__set_start(map, map__start(new_map)); map__set_end(map, map__end(new_map)); map__set_pgoff(map, map__pgoff(new_map)); - map__set_map_ip(map, map__map_ip_ptr(new_map)); - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); + map__set_mapping_type(map, map__mapping_type(new_map)); /* Ensure maps are correctly ordered */ map_ref = map__get(map); maps__remove(kmaps, map_ref); @@ -2067,124 +1937,6 @@ out: return ret; } -static int map__strcmp(const void *a, const void *b) -{ - const struct map *map_a = *(const struct map **)a; - const struct map *map_b = *(const struct map **)b; - const struct dso *dso_a = map__dso(map_a); - const struct dso *dso_b = map__dso(map_b); - int ret = strcmp(dso_a->short_name, dso_b->short_name); - - if (ret == 0 && map_a != map_b) { - /* - * Ensure distinct but name equal maps have an order in part to - * aid reference counting. - */ - ret = (int)map__start(map_a) - (int)map__start(map_b); - if (ret == 0) - ret = (int)((intptr_t)map_a - (intptr_t)map_b); - } - - return ret; -} - -static int map__strcmp_name(const void *name, const void *b) -{ - const struct dso *dso = map__dso(*(const struct map **)b); - - return strcmp(name, dso->short_name); -} - -void __maps__sort_by_name(struct maps *maps) -{ - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); -} - -static int map__groups__sort_by_name_from_rbtree(struct maps *maps) -{ - struct map_rb_node *rb_node; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - maps__nr_maps(maps) * sizeof(struct map *)); - int i = 0; - - if (maps_by_name == NULL) - return -1; - - up_read(maps__lock(maps)); - down_write(maps__lock(maps)); - - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); - - maps__for_each_entry(maps, rb_node) - maps_by_name[i++] = map__get(rb_node->map); - - __maps__sort_by_name(maps); - - up_write(maps__lock(maps)); - down_read(maps__lock(maps)); - - return 0; -} - -static struct map *__maps__find_by_name(struct maps *maps, const char *name) -{ - struct map **mapp; - - if (maps__maps_by_name(maps) == NULL && - map__groups__sort_by_name_from_rbtree(maps)) - return NULL; - - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), - sizeof(*mapp), map__strcmp_name); - if (mapp) - return *mapp; - return NULL; -} - -struct map *maps__find_by_name(struct maps *maps, const char *name) -{ - struct map_rb_node *rb_node; - struct map *map; - - down_read(maps__lock(maps)); - - - if (RC_CHK_ACCESS(maps)->last_search_by_name) { - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); - - if (strcmp(dso->short_name, name) == 0) { - map = RC_CHK_ACCESS(maps)->last_search_by_name; - goto out_unlock; - } - } - /* - * If we have maps->maps_by_name, then the name isn't in the rbtree, - * as maps->maps_by_name mirrors the rbtree when lookups by name are - * made. - */ - map = __maps__find_by_name(maps, name); - if (map || maps__maps_by_name(maps) != NULL) - goto out_unlock; - - /* Fallback to traversing the rbtree... */ - maps__for_each_entry(maps, rb_node) { - struct dso *dso; - - map = rb_node->map; - dso = map__dso(map); - if (strcmp(dso->short_name, name) == 0) { - RC_CHK_ACCESS(maps)->last_search_by_name = map; - goto out_unlock; - } - } - map = NULL; - -out_unlock: - up_read(maps__lock(maps)); - return map; -} - int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af87c46b3f89e5e5d60c3c769420e229431a95f1..071837ddce2ac7598cc674a7086666b8cf17450d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); -void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 0b589570d1d095c1a20047cad399b7eb3a19a95a..c114bbceef4013f099b05cd39ef73b7b1813750f 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,11 @@ struct symbol_conf { inline_name, disable_add2line_warn, buildid_mmap2, - guest_code; + guest_code, + lazy_load_kernel_maps, + keep_exited_threads, + annotate_data_member, + annotate_data_sample; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a0579c7d7b9e9ecbe0996e8fd54a8f277be1b597..3712186353fb94109e327195d1aee6d2177763ec 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -665,18 +665,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, } #endif +struct perf_event__synthesize_modules_maps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) +{ + struct perf_event__synthesize_modules_maps_cb_args *args = data; + union perf_event *event = args->event; + struct dso *dso; + size_t size; + + if (!__map__is_kmodule(map)) + return 0; + + dso = map__dso(map); + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); + event->mmap2.header.size += args->machine->id_hdr_size; + event->mmap2.start = map__start(map); + event->mmap2.len = map__size(map); + event->mmap2.pid = args->machine->pid; + + memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); + } else { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); + event->mmap.header.size += args->machine->id_hdr_size; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pid = args->machine->pid; + + memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); + } + + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - int rc = 0; - struct map_rb_node *pos; + int rc; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + struct perf_event__synthesize_modules_maps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + }; + size_t size = symbol_conf.buildid_mmap2 + ? sizeof(args.event->mmap2) + : sizeof(args.event->mmap); - event = zalloc(size + machine->id_hdr_size); - if (event == NULL) { + args.event = zalloc(size + machine->id_hdr_size); + if (args.event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; @@ -687,53 +743,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t * __perf_event_mmap */ if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; + args.event->header.misc = PERF_RECORD_MISC_KERNEL; else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - maps__for_each_entry(maps, pos) { - struct map *map = pos->map; - struct dso *dso; + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (!__map__is_kmodule(map)) - continue; + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); - dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap2.header.type = PERF_RECORD_MMAP2; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.start = map__start(map); - event->mmap2.len = map__size(map); - event->mmap2.pid = machine->pid; - - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); - - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); - } else { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); - } - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); + free(args.event); return rc; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fe5e6991ae4b496ba5a9876e3cbf22331439f953..89c47a5098e289b14e9807a4ea894e15f9e8ce4a 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -345,38 +345,36 @@ int thread__insert_map(struct thread *thread, struct map *map) if (ret) return ret; - maps__fixup_overlappings(thread__maps(thread), map, stderr); - return maps__insert(thread__maps(thread), map); + return maps__fixup_overlap_and_insert(thread__maps(thread), map); } -static int __thread__prepare_access(struct thread *thread) +struct thread__prepare_access_maps_cb_args { + int err; + struct maps *maps; +}; + +static int thread__prepare_access_maps_cb(struct map *map, void *data) { bool initialized = false; - int err = 0; - struct maps *maps = thread__maps(thread); - struct map_rb_node *rb_node; - - down_read(maps__lock(maps)); - - maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); - if (err || initialized) - break; - } + struct thread__prepare_access_maps_cb_args *args = data; - up_read(maps__lock(maps)); + args->err = unwind__prepare_access(args->maps, map, &initialized); - return err; + return (args->err || initialized) ? 1 : 0; } static int thread__prepare_access(struct thread *thread) { - int err = 0; + struct thread__prepare_access_maps_cb_args args = { + .err = 0, + }; - if (dwarf_callchain_users) - err = __thread__prepare_access(thread); + if (dwarf_callchain_users) { + args.maps = thread__maps(thread); + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); + } - return err; + return args.err; } static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) @@ -385,14 +383,14 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread__maps(thread) == thread__maps(parent)) { + if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread__pid(thread), thread__tid(thread), thread__pid(parent), thread__tid(parent)); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e79225a0ea46b7897700775f6330b4c6d91763c4..0df775b5c1105d75d74192d2ae994d99dc5f001b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -36,13 +36,22 @@ struct thread_rb_node { }; DECLARE_RC_STRUCT(thread) { + /** @maps: mmaps associated with this thread. */ struct maps *maps; pid_t pid_; /* Not all tools update this */ + /** @tid: thread ID number unique to a machine. */ pid_t tid; + /** @ppid: parent process of the process this thread belongs to. */ pid_t ppid; int cpu; int guest_cpu; /* For QEMU thread */ refcount_t refcnt; + /** + * @exited: Has the thread had an exit event. Such threads are usually + * removed from the machine's threads but some events/tools require + * access to dead threads. + */ + bool exited; bool comm_set; int comm_len; struct list_head namespaces_list; @@ -189,6 +198,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread) return &RC_CHK_ACCESS(thread)->refcnt; } +static inline void thread__set_exited(struct thread *thread, bool exited) +{ + RC_CHK_ACCESS(thread)->exited = exited; +} + static inline bool thread__comm_set(const struct thread *thread) { return RC_CHK_ACCESS(thread)->comm_set; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index be7157de045187b1be4ca4f10ef864eb1b023ec7..4db3d1bd686cf399757edb25ebf671958ba25f63 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct record_opts *opts = &top->record_opts; struct target *target = &opts->target; size_t ret = 0; + int nr_cpus; if (top->samples) { samples_per_sec = top->samples / top->delay_secs; @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else ret += SNPRINTF(bf + ret, size - ret, " (all"); + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : "", + nr_cpus > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : ""); + nr_cpus, nr_cpus > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a8b0d79bd96cfa36be55dde1995ed8e129b3d732..4c5588dbb1317d38fcbddb21d241becd61771706 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -21,7 +21,6 @@ struct perf_top { struct perf_tool tool; struct evlist *evlist, *sb_evlist; struct record_opts record_opts; - struct annotation_options annotation_opts; struct evswitch evswitch; /* * Symbols will be added here in perf_event__process_sample and will diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8554db3fc0d7c9fb6523032e6257e3d49abd64b7..6013335a8daea58a4fe19c0b4a5041e522f6707d 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -46,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -58,13 +59,25 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls into + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + base = map__start(al->map); + else + base = map__start(al->map) - map__pgoff(al->map); + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != map__start(al->map) - map__pgoff(al->map)) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) { @@ -72,14 +85,14 @@ static int __report_module(struct addr_location *al, u64 ip, __symbol__join_symfs(filename, sizeof(filename), dso->long_name); mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (mod) { diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index c0641882fd2fd7eef7991bc7b83fee0d3d03009b..dac536e28360a2481956360ec1753d2079488925 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -302,12 +302,31 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, return 0; } +struct read_unwind_spec_eh_frame_maps_cb_args { + struct dso *dso; + u64 base_addr; +}; + +static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) +{ + + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; + + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) + args->base_addr = map__start(map) - map__pgoff(map); + + return 0; +} + + static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - struct map_rb_node *map_node; - u64 base_addr = UINT64_MAX; + struct read_unwind_spec_eh_frame_maps_cb_args args = { + .dso = dso, + .base_addr = UINT64_MAX, + }; int ret, fd; if (dso->data.eh_frame_hdr_offset == 0) { @@ -325,16 +344,11 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, return -EINVAL; } - maps__for_each_entry(thread__maps(ui->thread), map_node) { - struct map *map = map_node->map; - u64 start = map__start(map); + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); - if (map__dso(map) == dso && start < base_addr) - base_addr = start; - } - base_addr -= dso->data.elf_base_addr; + args.base_addr -= dso->data.elf_base_addr; /* Address of .eh_frame_hdr */ - *segbase = base_addr + dso->data.eh_frame_hdr_addr; + *segbase = args.base_addr + dso->data.eh_frame_hdr_addr; ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, table_data, fde_count); if (ret) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ae3eee69b659c849ee4eb6ff88330d24d29852e5..df8963796187dc69515114aff048a7a757299a7d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -140,23 +140,34 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s return dso; } +struct machine__thread_dso_type_maps_cb_args { + struct machine *machine; + enum dso_type dso_type; +}; + +static int machine__thread_dso_type_maps_cb(struct map *map, void *data) +{ + struct machine__thread_dso_type_maps_cb_args *args = data; + struct dso *dso = map__dso(map); + + if (!dso || dso->long_name[0] != '/') + return 0; + + args->dso_type = dso__type(dso, args->machine); + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; +} + static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { - enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map_rb_node *rb_node; - - maps__for_each_entry(thread__maps(thread), rb_node) { - struct dso *dso = map__dso(rb_node->map); + struct machine__thread_dso_type_maps_cb_args args = { + .machine = machine, + .dso_type = DSO__TYPE_UNKNOWN, + }; - if (!dso || dso->long_name[0] != '/') - continue; - dso_type = dso__type(dso, machine); - if (dso_type != DSO__TYPE_UNKNOWN) - break; - } + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); - return dso_type; + return args.dso_type; } #if BITS_PER_LONG == 64 diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index 48dd2b018c47a7fcbe4a5e78e3d43bfc03d349b2..57027e0ac7b658a82ecd4ebd3153dfb3f8c1daad 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -7,35 +7,9 @@ int zstd_init(struct zstd_data *data, int level) { - size_t ret; - - data->dstream = ZSTD_createDStream(); - if (data->dstream == NULL) { - pr_err("Couldn't create decompression stream.\n"); - return -1; - } - - ret = ZSTD_initDStream(data->dstream); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - - if (!level) - return 0; - - data->cstream = ZSTD_createCStream(); - if (data->cstream == NULL) { - pr_err("Couldn't create compression stream.\n"); - return -1; - } - - ret = ZSTD_initCStream(data->cstream, level); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - + data->comp_level = level; + data->dstream = NULL; + data->cstream = NULL; return 0; } @@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data) return 0; } -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)) { @@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t ZSTD_outBuffer output; void *record; + if (!data->cstream) { + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, data->comp_level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", + ZSTD_getErrorName(ret)); + return -1; + } + } + while (input.pos < input.size) { record = dst; size = process_header(record, 0); @@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size ZSTD_inBuffer input = { src, src_size, 0 }; ZSTD_outBuffer output = { dst, dst_size, 0 }; + if (!data->dstream) { + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return 0; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", + ZSTD_getErrorName(ret)); + return 0; + } + } while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 95dc58b94178bf00b447b04440644bba1cd2209d..0b12c36902d82ddf23d8d71ac5067e282f1b1564 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -58,6 +58,7 @@ cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o +cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index f4e517a0c7740ffa2dfb4889231d42fad438a5a9..a3cdbb2be038c45e27326925d81ba43294b56c31 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -68,15 +68,19 @@ static struct acpi_device acpi0017_mock; static struct acpi_device host_bridge[NR_BRIDGES] = { [0] = { .handle = &host_bridge[0], + .pnp.unique_id = "0", }, [1] = { .handle = &host_bridge[1], + .pnp.unique_id = "1", }, [2] = { .handle = &host_bridge[2], + .pnp.unique_id = "2", }, [3] = { .handle = &host_bridge[3], + .pnp.unique_id = "3", }, }; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index ee61fa3a2411f8c2acc7272a20252fad95a8a811..35ee41e435ab3a531187b57fd0ea8f2291a8435d 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -251,7 +251,8 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) { memcpy(&pl->records[i], event_get_current(log), sizeof(pl->records[i])); - pl->records[i].hdr.handle = event_get_cur_event_handle(log); + pl->records[i].event.generic.hdr.handle = + event_get_cur_event_handle(log); log->cur_idx++; } @@ -337,87 +338,109 @@ static void cxl_mock_event_trigger(struct device *dev) } struct cxl_event_record_raw maint_needed = { - .hdr = { - .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB, - 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), - .length = sizeof(struct cxl_event_record_raw), - .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0xa5b6), + .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB, + 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), + .event.generic = { + .hdr = { + .length = sizeof(struct cxl_event_record_raw), + .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0xa5b6), + }, + .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }, - .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }; struct cxl_event_record_raw hardware_replace = { - .hdr = { - .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E, - 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), - .length = sizeof(struct cxl_event_record_raw), - .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0xb6a5), + .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E, + 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), + .event.generic = { + .hdr = { + .length = sizeof(struct cxl_event_record_raw), + .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0xb6a5), + }, + .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }, - .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }; -struct cxl_event_gen_media gen_media = { - .hdr = { - .id = UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), - .length = sizeof(struct cxl_event_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_gen_media { + uuid_t id; + struct cxl_event_gen_media rec; +} __packed; + +struct cxl_test_gen_media gen_media = { + .id = CXL_EVENT_GEN_MEDIA_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = */ + .channel = 1, + .rank = 30 }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = */ - .channel = 1, - .rank = 30 }; -struct cxl_event_dram dram = { - .hdr = { - .id = UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), - .length = sizeof(struct cxl_event_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_dram { + uuid_t id; + struct cxl_event_dram rec; +} __packed; + +struct cxl_test_dram dram = { + .id = CXL_EVENT_DRAM_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = */ + .channel = 1, + .bank_group = 5, + .bank = 2, + .column = {0xDE, 0xAD}, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = */ - .channel = 1, - .bank_group = 5, - .bank = 2, - .column = {0xDE, 0xAD}, }; -struct cxl_event_mem_module mem_module = { - .hdr = { - .id = UUID_INIT(0xfe927475, 0xdd59, 0x4339, - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), - .length = sizeof(struct cxl_event_mem_module), - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_mem_module { + uuid_t id; + struct cxl_event_mem_module rec; +} __packed; + +struct cxl_test_mem_module mem_module = { + .id = CXL_EVENT_MEM_MODULE_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_mem_module), + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .event_type = CXL_MMER_TEMP_CHANGE, + .info = { + .health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED, + .media_status = CXL_DHI_MS_ALL_DATA_LOST, + .add_status = (CXL_DHI_AS_CRITICAL << 2) | + (CXL_DHI_AS_WARNING << 4) | + (CXL_DHI_AS_WARNING << 5), + .device_temp = { 0xDE, 0xAD}, + .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, + .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, + .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, + } }, - .event_type = CXL_MMER_TEMP_CHANGE, - .info = { - .health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED, - .media_status = CXL_DHI_MS_ALL_DATA_LOST, - .add_status = (CXL_DHI_AS_CRITICAL << 2) | - (CXL_DHI_AS_WARNING << 4) | - (CXL_DHI_AS_WARNING << 5), - .device_temp = { 0xDE, 0xAD}, - .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, - .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - } }; static int mock_set_timestamp(struct cxl_dev_state *cxlds, @@ -439,11 +462,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.validity_flags); + &gen_media.rec.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.validity_flags); + &dram.rec.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c new file mode 100644 index 0000000000000000000000000000000000000000..0c365f36c73b56b4de5665050c9214af56ebd97f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include +#include "network_helpers.h" +#include "sock_iter_batch.skel.h" + +#define TEST_NS "sock_iter_batch_netns" + +static const int nr_soreuse = 4; + +static void do_test(int sock_type, bool onebyone) +{ + int err, i, nread, to_read, total_read, iter_fd = -1; + int first_idx, second_idx, indices[nr_soreuse]; + struct bpf_link *link = NULL; + struct sock_iter_batch *skel; + int *fds[2] = {}; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + return; + + /* Prepare 2 buckets of sockets in the kernel hashtable */ + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int local_port; + + fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0, + nr_soreuse); + if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server")) + goto done; + local_port = get_socket_local_port(*fds[i]); + if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) + goto done; + skel->rodata->ports[i] = ntohs(local_port); + } + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) + goto done; + + /* Test reading a bucket (either from fds[0] or fds[1]). + * Only read "nr_soreuse - 1" number of sockets + * from a bucket and leave one socket out from + * that bucket on purpose. + */ + to_read = (nr_soreuse - 1) * sizeof(*indices); + total_read = 0; + first_idx = -1; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + if (first_idx == -1) + first_idx = indices[0]; + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], first_idx, "first_idx"); + } while (total_read < to_read); + ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(total_read, to_read, "total_read"); + + free_fds(fds[first_idx], nr_soreuse); + fds[first_idx] = NULL; + + /* Read the "whole" second bucket */ + to_read = nr_soreuse * sizeof(*indices); + total_read = 0; + second_idx = !first_idx; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], second_idx, "second_idx"); + } while (total_read <= to_read); + ASSERT_EQ(nread, 0, "nread"); + /* Both so_reuseport ports should be in different buckets, so + * total_read must equal to the expected to_read. + * + * For a very unlikely case, both ports collide at the same bucket, + * the bucket offset (i.e. 3) will be skipped and it cannot + * expect the to_read number of bytes. + */ + if (skel->bss->bucket[0] != skel->bss->bucket[1]) + ASSERT_EQ(total_read, to_read, "total_read"); + +done: + for (i = 0; i < ARRAY_SIZE(fds); i++) + free_fds(fds[i], nr_soreuse); + if (iter_fd < 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +void test_sock_iter_batch(void) +{ + struct nstoken *nstoken = NULL; + + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS(done, "ip netns add %s", TEST_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto done; + + if (test__start_subtest("tcp")) { + do_test(SOCK_STREAM, true); + do_test(SOCK_STREAM, false); + } + if (test__start_subtest("udp")) { + do_test(SOCK_DGRAM, true); + do_test(SOCK_DGRAM, false); + } + close_netns(nstoken); + +done: + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 67d4ef9e62b378de8888b326495514f26d7b169d..e905cbaf6b3d109c1ff68f7763a6b1cf1c0f4c17 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -47,6 +47,19 @@ static void subtest_ctx_arg_rewrite(void) struct btf *btf = NULL; __u32 info_len = sizeof(info); int err, fd, i; + struct btf *kern_btf = NULL; + + kern_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(kern_btf, "kern_btf_load")) + return; + + /* simple detection of kernel native arg:ctx tag support */ + if (btf__find_by_name_kind(kern_btf, "bpf_subprog_arg_info", BTF_KIND_STRUCT) > 0) { + test__skip(); + btf__free(kern_btf); + return; + } + btf__free(kern_btf); skel = test_global_func_ctx_args__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1bdc680b0e0e26891ee045d19caef68e4133570f..e8bd4b7b5ef7695c7fa9176cb447504e36a9dfaa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -72,6 +72,8 @@ #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_dport sk.__sk_common.skc_dport +#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] + #define ir_loc_addr req.__req_common.skc_rcv_saddr #define ir_num req.__req_common.skc_num #define ir_rmt_addr req.__req_common.skc_daddr @@ -85,6 +87,7 @@ #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt #define sk_state __sk_common.skc_state +#define sk_net __sk_common.skc_net #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c new file mode 100644 index 0000000000000000000000000000000000000000..ffbbfe1fa1c1e335b574d1b1a93ee92be9d15eef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_tracing_net.h" +#include "bpf_kfuncs.h" + +#define ATTR __always_inline +#include "test_jhash.h" + +static bool ipv6_addr_loopback(const struct in6_addr *a) +{ + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0; +} + +volatile const __u16 ports[2]; +unsigned int bucket[2]; + +SEC("iter/tcp") +int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) +{ + struct sock *sk = (struct sock *)ctx->sk_common; + struct inet_hashinfo *hinfo; + unsigned int hash; + struct net *net; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + sk->sk_state != TCP_LISTEN || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in inet_lhash2_bucket_sk() */ + net = sk->sk_net.net; + hash = jhash2(sk->sk_v6_rcv_saddr.s6_addr32, 4, net->hash_mix); + hash ^= sk->sk_num; + hinfo = net->ipv4.tcp_death_row.hashinfo; + bucket[idx] = hash & hinfo->lhash2_mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) + +SEC("iter/udp") +int iter_udp_soreuse(struct bpf_iter__udp *ctx) +{ + struct sock *sk = (struct sock *)ctx->udp_sk; + struct udp_table *udptable; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in udp_hashslot2() */ + udptable = sk->sk_net.net->ipv4.udp_table; + bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h index c300734d26f63c8a6305d3cff333ee3fc83d8f50..ef53559bbbdf12dd63e7743e776b04ed800a117e 100644 --- a/tools/testing/selftests/bpf/progs/test_jhash.h +++ b/tools/testing/selftests/bpf/progs/test_jhash.h @@ -69,3 +69,34 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } + +static __always_inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + break; + case 0: /* Nothing left to add */ + break; + } + + return c; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index 9eeb2d89cda884171789581de5fa94138ca6b79d..67dddd9418911cb1ce3db26132fab340e93aab4d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -3,6 +3,7 @@ #include #include +#include #include "bpf_misc.h" #include "xdp_metadata.h" #include "bpf_kfuncs.h" @@ -138,25 +139,182 @@ __weak int subprog_ctx_tag(void *ctx __arg_ctx) return bpf_get_stack(ctx, stack, sizeof(stack), 0); } +__weak int raw_tp_canonical(struct bpf_raw_tracepoint_args *ctx __arg_ctx) +{ + return 0; +} + +__weak int raw_tp_u64_array(u64 *ctx __arg_ctx) +{ + return 0; +} + SEC("?raw_tp") __success __log_level(2) int arg_tag_ctx_raw_tp(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?raw_tp.w") +__success __log_level(2) +int arg_tag_ctx_raw_tp_writable(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?tp_btf/sys_enter") +__success __log_level(2) +int arg_tag_ctx_raw_tp_btf(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +struct whatever { }; + +__weak int tp_whatever(struct whatever *ctx __arg_ctx) +{ + return 0; } SEC("?tp") __success __log_level(2) int arg_tag_ctx_tp(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + tp_whatever(ctx); +} + +__weak int kprobe_subprog_pt_regs(struct pt_regs *ctx __arg_ctx) +{ + return 0; +} + +__weak int kprobe_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; } SEC("?kprobe") __success __log_level(2) int arg_tag_ctx_kprobe(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + + kprobe_subprog_pt_regs(ctx) + + kprobe_subprog_typedef(ctx); +} + +__weak int perf_subprog_regs( +#if defined(bpf_target_riscv) + struct user_regs_struct *ctx __arg_ctx +#elif defined(bpf_target_s390) + /* user_pt_regs typedef is anonymous struct, so only `void *` works */ + void *ctx __arg_ctx +#elif defined(bpf_target_loongarch) || defined(bpf_target_arm64) || defined(bpf_target_powerpc) + struct user_pt_regs *ctx __arg_ctx +#else + struct pt_regs *ctx __arg_ctx +#endif +) +{ + return 0; +} + +__weak int perf_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; +} + +__weak int perf_subprog_canonical(struct bpf_perf_event_data *ctx __arg_ctx) +{ + return 0; +} + +SEC("?perf_event") +__success __log_level(2) +int arg_tag_ctx_perf(void *ctx) +{ + return subprog_ctx_tag(ctx) + + perf_subprog_regs(ctx) + + perf_subprog_typedef(ctx) + + perf_subprog_canonical(ctx); +} + +__weak int iter_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int iter_subprog_typed(struct bpf_iter__task *ctx __arg_ctx) +{ + return 0; +} + +SEC("?iter/task") +__success __log_level(2) +int arg_tag_ctx_iter_task(struct bpf_iter__task *ctx) +{ + return (iter_subprog_void(ctx) + iter_subprog_typed(ctx)) & 1; +} + +__weak int tracing_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int tracing_subprog_u64(u64 *ctx __arg_ctx) +{ + return 0; +} + +int acc; + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fentry) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fexit/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fexit) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fmod_ret/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fmod_ret) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?lsm/bpf") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_lsm) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?struct_ops/test_1") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_struct_ops) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)arg_tag_ctx_struct_ops, +}; + +SEC("?syscall") +__success __log_level(2) +int arg_tag_ctx_syscall(void *ctx) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx); } __weak int subprog_dynptr(struct bpf_dynptr *dptr) diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c index 71814a7532160638b1f345d42fcfcef4c62f8182..a9ab37d3b9e2df323d702f44705b1117bc443780 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c @@ -146,4 +146,23 @@ l0_%=: exit; \ : __clobber_all); } +SEC("flow_dissector") +__description("flow_keys illegal alu op with variable offset") +__failure __msg("R7 pointer arithmetic on flow_keys prohibited") +__naked void flow_keys_illegal_variable_offset_alu(void) +{ + asm volatile(" \ + r6 = r1; \ + r7 = *(u64*)(r6 + %[flow_keys_off]); \ + r8 = 8; \ + r8 /= 1; \ + r8 &= 8; \ + r7 += r8; \ + r0 = *(u64*)(r7 + 0); \ + exit; \ +" : + : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 70638fa50b2cc872747bd9cdd34a1a111251dc97..899d7fb6ea8e906942cdac62ae61c64def72248d 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,2 +1,10 @@ CONFIG_BONDING=y +CONFIG_BRIDGE=y +CONFIG_DUMMY=y +CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NLMON=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh index ad4c845a4ac7c2ae8cf028c2d0b2077a59befd09..b76bf50309524a6e1b59340f9b6370ed6d484e6d 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Regression Test: diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh index 2330d37453f956eb720f452fd12cdfd1cdc5aebc..8c2619002147915bfc03cf20fc56a257769895e9 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Regression Test: diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 42ce602d8d492e5fb64b305f0a28149df87b28b0..0f0f4f05807c92076dcf26a57e3229d45bf90a84 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -40,7 +40,6 @@ # | + $swp1 $swp3 + + $swp4 | # | | iPOOL1 iPOOL0 | | iPOOL2 | # | | ePOOL4 ePOOL5 | | ePOOL4 | -# | | 1Gbps | | 1Gbps | # | | PFC:enabled=1 | | PFC:enabled=1 | # | +-|----------------------|-+ +-|------------------------+ | # | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | @@ -120,6 +119,9 @@ h2_destroy() switch_create() { + local lanes_swp4 + local pg1_size + # pools # ----- @@ -229,7 +231,20 @@ switch_create() dcb pfc set dev $swp4 prio-pfc all:off 1:on # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB + pg1_size=$_100KB + + setup_wait_dev_with_timeout $swp4 + + lanes_swp4=$(ethtool $swp4 | grep 'Lanes:') + lanes_swp4=${lanes_swp4#*"Lanes: "} + + # 8-lane ports use two buffers among which the configured buffer + # is split, so double the size to get twice (20K + 80K). + if [[ $lanes_swp4 -eq 8 ]]; then + pg1_size=$((pg1_size * 2)) + fi + + dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size # bridges # ------- diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index fb850e0ec8375f143c8da0459ee48ca778bb3a29..616d3581419ca043fc715f9d067341c89513f6eb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -10,7 +10,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ - bloom_simple_test bloom_complex_test bloom_delta_test" + bloom_simple_test bloom_complex_test bloom_delta_test \ + max_erp_entries_test max_group_size_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -983,6 +984,109 @@ bloom_delta_test() log_test "bloom delta test ($tcflags)" } +max_erp_entries_test() +{ + # The number of eRP entries is limited. Once the maximum number of eRPs + # has been reached, filters cannot be added. This test verifies that + # when this limit is reached, inserstion fails without crashing. + + RET=0 + + local num_masks=32 + local num_regions=15 + local chain_failed + local mask_failed + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_regions; i++)); do + for ((j=$num_masks; j >= 0; j--)); do + tc filter add dev $h2 ingress chain $i protocol ip \ + pref $i handle $j flower $tcflags \ + dst_ip 192.1.0.0/$j &> /dev/null + ret=$? + + if [ $ret -ne 0 ]; then + chain_failed=$i + mask_failed=$j + break 2 + fi + done + done + + # We expect to exceed the maximum number of eRP entries, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of eRP entries" + + for ((; i >= 1; i--)); do + for ((j=0; j <= $num_masks; j++)); do + tc filter del dev $h2 ingress chain $i protocol ip \ + pref $i handle $j flower &> /dev/null + done + done + + log_test "max eRP entries test ($tcflags). " \ + "max chain $chain_failed, mask $mask_failed" +} + +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config new file mode 100644 index 0000000000000000000000000000000000000000..adf45a3a78b41eefff9f620c84a863724418be78 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/config @@ -0,0 +1,10 @@ +CONFIG_DUMMY=y +CONFIG_GENEVE=m +CONFIG_IPV6=y +CONFIG_NETDEVSIM=m +CONFIG_NET_SCH_MQPRIO=y +CONFIG_NET_SCH_MULTIQ=y +CONFIG_NET_SCH_PRIO=y +CONFIG_PSAMPLE=y +CONFIG_PTP_1588_CLOCK_MOCK=y +CONFIG_VXLAN=m diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 922744059aaa24527c34a75bda18eb2a304cfbc4..80160579e0cc1ec30accfccb17b7bfa4c3937b51 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -51,6 +51,7 @@ function make_netdev { fi echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device + udevadm settle # get new device name ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ } diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh index 0c56746e9ce0e649b8f6a15477ce972dca04326a..7d7829f57550d8345c3546ecd0994f819969440b 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -8,16 +8,20 @@ NSIM_NETDEV=$(make_netdev) set -o pipefail +# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...") +# in ethtool CLI the Configured lines start with Supported/Configured. +configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1) + # netdevsim starts out with None/None s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: None +check $? "$s" "$configured FEC encodings: None Active FEC encoding: None" # Test Auto $ETHTOOL --set-fec $NSIM_NETDEV encoding auto check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: Auto +check $? "$s" "$configured FEC encodings: Auto Active FEC encoding: Off" # Test case in-sensitivity @@ -25,7 +29,7 @@ for o in off Off OFF; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: Off + check $? "$s" "$configured FEC encodings: Off Active FEC encoding: Off" done @@ -33,7 +37,7 @@ for o in BaseR baser BAser; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: BaseR + check $? "$s" "$configured FEC encodings: BaseR Active FEC encoding: BaseR" done @@ -41,7 +45,7 @@ for o in llrs rs; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: ${o^^} + check $? "$s" "$configured FEC encodings: ${o^^} Active FEC encoding: ${o^^}" done @@ -49,13 +53,13 @@ done $ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: RS LLRS +check $? "$s" "$configured FEC encodings: RS LLRS Active FEC encoding: LLRS" $ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: Auto Off RS +check $? "$s" "$configured FEC encodings: Auto Off RS Active FEC encoding: RS" # Make sure other link modes are rejected diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 1b08e042cf942a126626bbd3cad88152633f68a9..4855ef597a152135979694fb3e9145f1db4e8bcf 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -233,6 +233,7 @@ function print_tables { function get_netdev_name { local -n old=$1 + udevadm settle new=$(ls /sys/class/net) for netdev in $new; do diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc new file mode 100644 index 0000000000000000000000000000000000000000..d44d09a33a74397bca5a609a18b2db718f88e4dd --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc @@ -0,0 +1,95 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Change the ringbuffer sub-buffer size +# requires: buffer_subbuf_size_kb +# flags: instance + +get_buffer_data_size() { + sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_buffer_data_offset() { + sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_event_header_size() { + type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + total_bits=$((type_len+time_len+array_len)) + total_bits=$((total_bits+7)) + echo $((total_bits/8)) +} + +get_print_event_buf_offset() { + sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format +} + +event_header_size=`get_event_header_size` +print_header_size=`get_print_event_buf_offset` + +data_offset=`get_buffer_data_offset` + +marker_meta=$((event_header_size+print_header_size)) + +make_str() { + cnt=$1 + printf -- 'X%.0s' $(seq $cnt) +} + +write_buffer() { + size=$1 + + str=`make_str $size` + + # clear the buffer + echo > trace + + # write the string into the marker + echo $str > trace_marker + + echo $str +} + +test_buffer() { + size_kb=$1 + page_size=$((size_kb*1024)) + + size=`get_buffer_data_size` + + # the size must be greater than or equal to page_size - data_offset + page_size=$((page_size-data_offset)) + if [ $size -lt $page_size ]; then + exit fail + fi + + # Now add a little more the meta data overhead will overflow + + str=`write_buffer $size` + + # Make sure the line was broken + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace` + + if [ "$new_str" = "$str" ]; then + exit fail; + fi + + # Make sure the entire line can be found + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace` + + if [ "$new_str" != "$str" ]; then + exit fail; + fi +} + +ORIG=`cat buffer_subbuf_size_kb` + +# Could test bigger sizes than 32K, but then creating the string +# to write into the ring buffer takes too long +for a in 4 8 16 32 ; do + echo $a > buffer_subbuf_size_kb + test_buffer $a +done + +echo $ORIG > buffer_subbuf_size_kb + diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc new file mode 100644 index 0000000000000000000000000000000000000000..9aa0db2b84fc5d20ecaa017853ea5e5d5b39d018 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Basic tests on writing to trace_marker +# requires: trace_marker +# flags: instance + +get_buffer_data_size() { + sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_buffer_data_offset() { + sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_event_header_size() { + type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + total_bits=$((type_len+time_len+array_len)) + total_bits=$((total_bits+7)) + echo $((total_bits/8)) +} + +get_print_event_buf_offset() { + sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format +} + +event_header_size=`get_event_header_size` +print_header_size=`get_print_event_buf_offset` + +data_offset=`get_buffer_data_offset` + +marker_meta=$((event_header_size+print_header_size)) + +make_str() { + cnt=$1 + # subtract two for \n\0 as marker adds these + cnt=$((cnt-2)) + printf -- 'X%.0s' $(seq $cnt) +} + +write_buffer() { + size=$1 + + str=`make_str $size` + + # clear the buffer + echo > trace + + # write the string into the marker + echo -n $str > trace_marker + + echo $str +} + +test_buffer() { + + size=`get_buffer_data_size` + oneline_size=$((size-marker_meta)) + echo size = $size + echo meta size = $marker_meta + + # Now add a little more the meta data overhead will overflow + + str=`write_buffer $size` + + # Make sure the line was broken + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace` + + if [ "$new_str" = "$str" ]; then + exit fail; + fi + + # Make sure the entire line can be found + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace` + + if [ "$new_str" != "$str" ]; then + exit fail; + fi +} + +test_buffer diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6ed328c863c4f13ccc9db5bcf51092bd4ce183c4..1a881e7a21d1b26ce7ad19de1cc5ea07d3773ff9 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -116,6 +116,7 @@ TEST_F(iommufd, cmd_length) TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id); TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved); TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved); + TEST_LENGTH(iommu_hwpt_invalidate, IOMMU_HWPT_INVALIDATE, __reserved); TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id); TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES, out_iova_alignment); @@ -271,7 +272,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) struct iommu_hwpt_selftest data = { .iotlb = IOMMU_TEST_IOTLB_DEFAULT, }; + struct iommu_hwpt_invalidate_selftest inv_reqs[2] = {}; uint32_t nested_hwpt_id[2] = {}; + uint32_t num_inv; uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id_not_work = 0; uint32_t test_hwpt_id = 0; @@ -330,6 +333,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &nested_hwpt_id[1], IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], + IOMMU_TEST_IOTLB_DEFAULT); /* Negative test: a nested hwpt on top of a nested hwpt */ test_err_hwpt_alloc_nested(EINVAL, self->device_id, @@ -340,6 +347,151 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); + /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + num_inv = 1; + test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + 1, &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], NULL, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + 0, &num_inv); + assert(!num_inv); + + /* Negative test: invalid iotlb_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* + * Invalidate the 1st iotlb entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].iotlb_id = 1; + test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* + * Invalidate the 1st iotlb entry but fail the 2nd request + * due to invalid iotlb_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* Invalidate the 2nd iotlb entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 1; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* Invalidate the 3rd and 4th iotlb entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].iotlb_id = 3; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 2); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], 0); + + /* Invalidate all iotlb entries for nested_hwpt_id[1] and verify */ + num_inv = 1; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_hwpt_invalidate(nested_hwpt_id[1], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], 0); + /* Attach device to nested_hwpt_id[0] that then will be busy */ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]); EXPECT_ERRNO(EBUSY, diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index ad9202335656cc82e8475cf74aba72b6adf7e2b0..c646264aa41fdc1871c60bba6dc25841767f399b 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -195,6 +195,61 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ hwpt_id, data_type, data, data_len)) +#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_IOTLB, \ + .id = hwpt_id, \ + .check_iotlb = { \ + .id = iotlb_id, \ + .iotlb = expected, \ + }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_IOTLB), \ + &test_cmd)); \ + }) + +#define test_cmd_hwpt_check_iotlb_all(hwpt_id, expected) \ + ({ \ + int i; \ + for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++) \ + test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \ + }) + +static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, + uint32_t data_type, uint32_t lreq, + uint32_t *nreqs) +{ + struct iommu_hwpt_invalidate cmd = { + .size = sizeof(cmd), + .hwpt_id = hwpt_id, + .data_type = data_type, + .data_uptr = (uint64_t)reqs, + .entry_len = lreq, + .entry_num = *nreqs, + }; + int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd); + *nreqs = cmd.entry_num; + return rc; +} + +#define test_cmd_hwpt_invalidate(hwpt_id, reqs, data_type, lreq, nreqs) \ + ({ \ + ASSERT_EQ(0, \ + _test_cmd_hwpt_invalidate(self->fd, hwpt_id, reqs, \ + data_type, lreq, nreqs)); \ + }) +#define test_err_hwpt_invalidate(_errno, hwpt_id, reqs, data_type, lreq, \ + nreqs) \ + ({ \ + EXPECT_ERRNO(_errno, _test_cmd_hwpt_invalidate( \ + self->fd, hwpt_id, reqs, \ + data_type, lreq, nreqs)); \ + }) + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) { diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh old mode 100755 new mode 100644 diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index a10a32952f2167d4042dfbfa54a859770a22ff48..4667d74579d135eb74d701d79baa3036c88f2635 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -28,6 +28,7 @@ ALL_TESTS=" kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto + kci_test_enslave_bonding " devdummy="test-dummy0" @@ -1241,6 +1242,31 @@ kci_test_address_proto() return $ret } +kci_test_enslave_bonding() +{ + local bond="bond123" + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP bonding tests: cannot add net namespace $testns" + return $ksft_skip + fi + + run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr + run_cmd ip -netns $testns link add dev $devdummy type dummy + run_cmd ip -netns $testns link set dev $devdummy up + run_cmd ip -netns $testns link set dev $devdummy master $bond down + if [ $ret -ne 0 ]; then + end_test "FAIL: initially up interface added to a bond and set down" + ip netns del "$testns" + return 1 + fi + + end_test "PASS: enslave interface in a bond" + ip netns del "$testns" +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 8e60bae67aa9f22f59d2251a32e32e62cad4c84c..522d991e310ebf0277dd8f576c7f56e89d229700 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -52,5 +52,5 @@ $(OUTPUT)/%_ipv6: %.c $(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT $(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT -$(OUTPUT)/bench-lookups_ipv4: LDFLAGS+= -lm -$(OUTPUT)/bench-lookups_ipv6: LDFLAGS+= -lm +$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm +$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 464853a7f98290ec3a7c6bf69786cf8c12e1f789..7799e042a9719cda33ea7d004d2ae4a2ec608a4f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -707,6 +707,20 @@ TEST_F(tls, splice_from_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_more) +{ + unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT; + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + int i, send_pipe = 1; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_GE(write(p[1], mem_send, send_len), 0); + for (i = 0; i < 32; i++) + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1); +} + TEST_F(tls, splice_from_pipe2) { int send_len = 16000; diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index 50a2cc8aef387cacc2444d5fa7264c8953e5d0b8..c537d52fafc586d5644ca6f7ec13a4358b4051c0 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -36,16 +36,14 @@ static void sigill_handler(int sig, siginfo_t *info, void *context) regs[0] += 4; } -static void cbo_insn(char *base, int fn) -{ - uint32_t insn = MK_CBO(fn); - - asm volatile( - "mv a0, %0\n" - "li a1, %1\n" - ".4byte %2\n" - : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory"); -} +#define cbo_insn(base, fn) \ +({ \ + asm volatile( \ + "mv a0, %0\n" \ + "li a1, %1\n" \ + ".4byte %2\n" \ + : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \ +}) static void cbo_inval(char *base) { cbo_insn(base, 0); } static void cbo_clean(char *base) { cbo_insn(base, 1); } @@ -97,7 +95,7 @@ static void test_zicboz(void *arg) block_size = pair.value; ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE && is_power_of_2(block_size), "Zicboz block size\n"); - ksft_print_msg("Zicboz block size: %ld\n", block_size); + ksft_print_msg("Zicboz block size: %llu\n", block_size); illegal_insn = false; cbo_zero(&mem[block_size]); @@ -121,7 +119,7 @@ static void test_zicboz(void *arg) for (j = 0; j < block_size; ++j) { if (mem[i * block_size + j] != expected) { ksft_test_result_fail("cbo.zero check\n"); - ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n", + ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n", i * block_size + j, expected); return; } @@ -201,7 +199,7 @@ int main(int argc, char **argv) pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0); if (rc < 0) - ksft_exit_fail_msg("hwprobe() failed with %d\n", rc); + ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) { diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c index d53e0889b59e1e148f033501b4ba48176a2cb81b..fd73c87804f348ff9a80a2b7f13d67bbd16903da 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) /* Fail if the kernel claims not to recognize a base key. */ if ((i < 4) && (pairs[i].key != i)) ksft_exit_fail_msg("Failed to recognize base key: key != i, " - "key=%ld, i=%ld\n", pairs[i].key, i); + "key=%lld, i=%ld\n", pairs[i].key, i); if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) continue; @@ -37,7 +37,7 @@ int main(int argc, char **argv) if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) continue; - ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value); + ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value); } out = riscv_hwprobe(pairs, 8, 0, 0, 0); diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 9b8434f62f570d472871641f8ec0c351fc48b3fb..2e0db9c5be6c334f9ed7d0187fae6ed6de950745 100644 --- a/tools/testing/selftests/riscv/mm/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -18,6 +18,8 @@ struct addresses { int *on_56_addr; }; +// Only works on 64 bit +#if __riscv_xlen == 64 static inline void do_mmaps(struct addresses *mmap_addresses) { /* @@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses) mmap_addresses->on_56_addr = mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); } +#endif /* __riscv_xlen == 64 */ static inline int memory_layout(void) { diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c index 66764edb0d5268e8e2aabcb2c47aa3bfdc84033e..1dd94197da30cc5d17c3aa731e6a50b48d3569f4 100644 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -27,7 +27,7 @@ int main(void) datap = malloc(MAX_VSIZE); if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); exit(-1); } diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 2c0d2b1126c1e31db76fbd722bdf311bfbaafa9d..1f9969bed2355befb50355e23625d9af8a5a5256 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include + #define THIS_PROGRAM "./vstate_exec_nolibc" int main(int argc, char **argv) diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 8dcd399ef7fc9fb719863999a60b26a538605877..27668fb3b6d08209b8c6a98dec01d6935941b47e 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -60,7 +60,7 @@ int test_and_compare_child(long provided, long expected, int inherit) } rc = launch_test(inherit); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %d\n", rc, + ksft_test_result_fail("Test failed, check %d != %ld\n", rc, expected); return -2; } @@ -79,7 +79,7 @@ int main(void) pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %d\n", rc); + ksft_test_result_fail("hwprobe() failed with %ld\n", rc); return -1; } diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 50aab6b57da34d0f9572f0bbc72f76f12acd6b02..867f88ce2570aef81e3283a9b6484460cfae9b68 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -12,14 +12,16 @@ OBJCOPY := $(CROSS_COMPILE)objcopy endif INCLUDES := -I$(top_srcdir)/tools/include -HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack -ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC +HOST_LDFLAGS := -z noexecstack -lcrypto +ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) +ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none +ifeq ($(CAN_BUILD_X86_64), 1) TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx TEST_FILES := $(OUTPUT)/test_encl.elf -ifeq ($(CAN_BUILD_X86_64), 1) all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf endif @@ -28,7 +30,7 @@ $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \ $(OUTPUT)/sigstruct.o \ $(OUTPUT)/call.o \ $(OUTPUT)/sign_key.o - $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto + $(CC) $(HOST_CFLAGS) -o $@ $^ $(HOST_LDFLAGS) $(OUTPUT)/main.o: main.c $(CC) $(HOST_CFLAGS) -c $< -o $@ @@ -45,8 +47,8 @@ $(OUTPUT)/call.o: call.S $(OUTPUT)/sign_key.o: sign_key.S $(CC) $(HOST_CFLAGS) -c $< -o $@ -$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S - $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none +$(OUTPUT)/test_encl.elf: test_encl.c test_encl_bootstrap.S + $(CC) $(ENCL_CFLAGS) $^ -o $@ $(ENCL_LDFLAGS) EXTRA_CLEAN := \ $(OUTPUT)/test_encl.elf \ diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index d8587c971941a8ac40f9a284fc34d2993fa0d8af..402f8787a71cc0221b1af0690efbee1c8e0bfb42 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -13,6 +13,8 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) +#define __used __attribute__((used)) +#define __section(x)__attribute__((__section__(x))) #include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 94bdeac1cf041a28e6bdea9a006f1d0d1098c330..c9f658e44de6c1b2266012d0ea908d7a9b3bb605 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -136,11 +136,11 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) */ uint64_t encl_get_entry(struct encl *encl, const char *symbol) { + Elf64_Sym *symtab = NULL; + char *sym_names = NULL; Elf64_Shdr *sections; - Elf64_Sym *symtab; Elf64_Ehdr *ehdr; - char *sym_names; - int num_sym; + int num_sym = 0; int i; ehdr = encl->bin; @@ -161,6 +161,9 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol) } } + if (!symtab || !sym_names) + return 0; + for (i = 0; i < num_sym; i++) { Elf64_Sym *sym = &symtab[i]; diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index a07896a463643d3bd8e6e29c1dfc7f6b44e5f49e..d73b29becf5b01b7ea5366a085194fbb7965ed87 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -318,9 +318,9 @@ bool encl_measure(struct encl *encl) struct sgx_sigstruct *sigstruct = &encl->sigstruct; struct sgx_sigstruct_payload payload; uint8_t digest[SHA256_DIGEST_LENGTH]; + EVP_MD_CTX *ctx = NULL; unsigned int siglen; RSA *key = NULL; - EVP_MD_CTX *ctx; int i; memset(sigstruct, 0, sizeof(*sigstruct)); @@ -384,7 +384,8 @@ bool encl_measure(struct encl *encl) return true; err: - EVP_MD_CTX_destroy(ctx); + if (ctx) + EVP_MD_CTX_destroy(ctx); RSA_free(key); return false; } diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index c0d6397295e311499484e1332e4f7f7d05775bbb..2c4d709cce2d9151f37af2ccd243bf5c847d614e 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -5,11 +5,12 @@ #include "defines.h" /* - * Data buffer spanning two pages that will be placed first in .data - * segment. Even if not used internally the second page is needed by - * external test manipulating page permissions. + * Data buffer spanning two pages that will be placed first in the .data + * segment via the linker script. Even if not used internally the second page + * is needed by external test manipulating page permissions, so mark + * encl_buffer as "used" to make sure it is entirely preserved by the compiler. */ -static uint8_t encl_buffer[8192] = { 1 }; +static uint8_t __used __section(".data.encl_buffer") encl_buffer[8192] = { 1 }; enum sgx_enclu_function { EACCEPT = 0x5, @@ -24,10 +25,11 @@ static void do_encl_emodpe(void *_op) secinfo.flags = op->flags; asm volatile(".byte 0x0f, 0x01, 0xd7" - : + : /* no outputs */ : "a" (EMODPE), "b" (&secinfo), - "c" (op->epc_addr)); + "c" (op->epc_addr) + : "memory" /* read from secinfo pointer */); } static void do_encl_eaccept(void *_op) @@ -42,7 +44,8 @@ static void do_encl_eaccept(void *_op) : "=a" (rax) : "a" (EACCEPT), "b" (&secinfo), - "c" (op->epc_addr)); + "c" (op->epc_addr) + : "memory" /* read from secinfo pointer */); op->ret = rax; } @@ -119,21 +122,41 @@ static void do_encl_op_nop(void *_op) } +/* + * Symbol placed at the start of the enclave image by the linker script. + * Declare this extern symbol with visibility "hidden" to ensure the compiler + * does not access it through the GOT and generates position-independent + * addressing as __encl_base(%rip), so we can get the actual enclave base + * during runtime. + */ +extern const uint8_t __attribute__((visibility("hidden"))) __encl_base; + +typedef void (*encl_op_t)(void *); +static const encl_op_t encl_op_array[ENCL_OP_MAX] = { + do_encl_op_put_to_buf, + do_encl_op_get_from_buf, + do_encl_op_put_to_addr, + do_encl_op_get_from_addr, + do_encl_op_nop, + do_encl_eaccept, + do_encl_emodpe, + do_encl_init_tcs_page, +}; + void encl_body(void *rdi, void *rsi) { - const void (*encl_op_array[ENCL_OP_MAX])(void *) = { - do_encl_op_put_to_buf, - do_encl_op_get_from_buf, - do_encl_op_put_to_addr, - do_encl_op_get_from_addr, - do_encl_op_nop, - do_encl_eaccept, - do_encl_emodpe, - do_encl_init_tcs_page, - }; - - struct encl_op_header *op = (struct encl_op_header *)rdi; - - if (op->type < ENCL_OP_MAX) - (*encl_op_array[op->type])(op); + struct encl_op_header *header = (struct encl_op_header *)rdi; + encl_op_t op; + + if (header->type >= ENCL_OP_MAX) + return; + + /* + * The enclave base address needs to be added, as this call site + * *cannot be* made rip-relative by the compiler, or fixed up by + * any other possible means. + */ + op = ((uint64_t)&__encl_base) + encl_op_array[header->type]; + + (*op)(header); } diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index a1ec64f7d91fc52bc8a8971fd64f261790a73cf1..ffe851a1cac4063135edb5e0ae46a3c80dcb5edb 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -10,6 +10,7 @@ PHDRS SECTIONS { . = 0; + __encl_base = .; .tcs : { *(.tcs*) } : tcs @@ -23,6 +24,7 @@ SECTIONS } : text .data : { + *(.data.encl_buffer) *(.data*) } : data @@ -31,11 +33,9 @@ SECTIONS *(.note*) *(.debug*) *(.eh_frame*) + *(.dyn*) + *(.gnu.hash) } } -ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") +ASSERT(!DEFINED(_GLOBAL_OFFSET_TABLE_), "Libcalls through GOT are not supported in enclaves") diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 03ae0f57e29d0ef1f6ad7f963d78f4a09e0fbeac..d8c4ac94e032c9bef82d827ac37f00323ac0ab6c 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -42,9 +42,12 @@ encl_entry: # RBX contains the base address for TCS, which is the first address # inside the enclave for TCS #1 and one page into the enclave for - # TCS #2. By adding the value of encl_stack to it, we get - # the absolute address for the stack. - lea (encl_stack)(%rbx), %rax + # TCS #2. First make it relative by substracting __encl_base and + # then add the address of encl_stack to get the address for the stack. + lea __encl_base(%rip), %rax + sub %rax, %rbx + lea encl_stack(%rip), %rax + add %rbx, %rax jmp encl_entry_core encl_dyn_entry: # Entry point for dynamically created TCS page expected to follow @@ -55,25 +58,12 @@ encl_entry_core: push %rax push %rcx # push the address after EENTER - push %rbx # push the enclave base address + # NOTE: as the selftest enclave is *not* intended for production, + # simplify the code by not initializing ABI registers on entry or + # cleansing caller-save registers on exit. call encl_body - pop %rbx # pop the enclave base address - - /* Clear volatile GPRs, except RAX (EEXIT function). */ - xor %rcx, %rcx - xor %rdx, %rdx - xor %rdi, %rdi - xor %rsi, %rsi - xor %r8, %r8 - xor %r9, %r9 - xor %r10, %r10 - xor %r11, %r11 - - # Reset status flags. - add %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1 - # Prepare EEXIT target by popping the address of the instruction after # EENTER to RBX. pop %rbx diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index 61230532fef10f7261db75e5757a8b2c4366d2d9..edcdb8abfa31ca82e2cb506e2c7749ad8116e7e0 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -27,6 +27,7 @@ static unsigned int offset; static unsigned int ino = 721; static time_t default_mtime; +static bool do_file_mtime; static bool do_csum = false; struct file_handler { @@ -329,6 +330,7 @@ static int cpio_mkfile(const char *name, const char *location, int file; int retval; int rc = -1; + time_t mtime; int namesize; unsigned int i; uint32_t csum = 0; @@ -347,16 +349,21 @@ static int cpio_mkfile(const char *name, const char *location, goto error; } - if (buf.st_mtime > 0xffffffff) { - fprintf(stderr, "%s: Timestamp exceeds maximum cpio timestamp, clipping.\n", - location); - buf.st_mtime = 0xffffffff; - } + if (do_file_mtime) { + mtime = default_mtime; + } else { + mtime = buf.st_mtime; + if (mtime > 0xffffffff) { + fprintf(stderr, "%s: Timestamp exceeds maximum cpio timestamp, clipping.\n", + location); + mtime = 0xffffffff; + } - if (buf.st_mtime < 0) { - fprintf(stderr, "%s: Timestamp negative, clipping.\n", - location); - buf.st_mtime = 0; + if (mtime < 0) { + fprintf(stderr, "%s: Timestamp negative, clipping.\n", + location); + mtime = 0; + } } if (buf.st_size > 0xffffffff) { @@ -387,7 +394,7 @@ static int cpio_mkfile(const char *name, const char *location, (long) uid, /* uid */ (long) gid, /* gid */ nlinks, /* nlink */ - (long) buf.st_mtime, /* mtime */ + (long) mtime, /* mtime */ size, /* filesize */ 3, /* major */ 1, /* minor */ @@ -536,8 +543,9 @@ static void usage(const char *prog) "file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n" "\n" " is time in seconds since Epoch that will be used\n" - "as mtime for symlinks, special files and directories. The default\n" - "is to use the current time for these entries.\n" + "as mtime for symlinks, directories, regular and special files.\n" + "The default is to use the current time for all files, but\n" + "preserve modification time for regular files.\n" "-c: calculate and store 32-bit checksums for file data.\n", prog); } @@ -594,6 +602,7 @@ int main (int argc, char *argv[]) usage(argv[0]); exit(1); } + do_file_mtime = true; break; case 'c': do_csum = true;